public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
@ 2026-04-14 16:56 Lizhi Hou
  2026-04-14 16:58 ` Mario Limonciello
  0 siblings, 1 reply; 10+ messages in thread
From: Lizhi Hou @ 2026-04-14 16:56 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan, Lizhi Hou

From: Max Zhen <max.zhen@amd.com>

Add support for configuring the hardware scheduler time quantum to
improve fairness across concurrent contexts.

The scheduler enforces a fixed time slice per context, preventing
long-running workloads from monopolizing the device and allowing
other contexts to make forward progress.

The default time quantum is 30ms and can be configured via the
time_quantum_ms module parameter.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_message.c  | 44 +++++++++++++++++++++++++++
 drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
 drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
 drivers/accel/amdxdna/aie2_pci.h      |  2 ++
 drivers/accel/amdxdna/npu4_regs.c     |  3 +-
 5 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index e52dc7ea9fc7..976ad6281078 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
 	aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
 	return ret;
 }
+
+static int
+aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
+			     struct amdxdna_hwctx *ctx, u32 type, u32 value)
+{
+	DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	int ret;
+
+	if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
+		return -EOPNOTSUPP;
+
+	if (ctx)
+		req.context_id = ctx->fw_ctx_id;
+	else
+		req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
+
+	req.time_quota_us = value;
+	req.type = type;
+
+	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+	if (ret) {
+		XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
+			 ctx ? ctx->name : "ctx.all", type, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us)
+{
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	int ret;
+
+	ret = aie2_runtime_update_ctx_prop(ndev, NULL, UPDATE_PROPERTY_TIME_QUOTA, us);
+	if (ret == -EOPNOTSUPP) {
+		XDNA_DBG(xdna, "update time quota not support, skipped");
+		ret = 0;
+	} else if (!ret) {
+		XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", us);
+	}
+	return ret;
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index f18e89a39e35..fc2e99510980 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -31,6 +31,7 @@ enum aie2_msg_opcode {
 	MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
 	MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
 	MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
+	MSG_OP_UPDATE_PROPERTY             = 0x113,
 	MSG_OP_GET_APP_HEALTH              = 0x114,
 	MSG_OP_MAX_DRV_OPCODE,
 	MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
@@ -503,4 +504,19 @@ struct get_app_health_resp {
 	__u32 required_buffer_size;
 	__u32 reserved[7];
 } __packed;
+
+struct update_property_req {
+#define UPDATE_PROPERTY_TIME_QUOTA 0
+	__u32 type;
+#define AIE2_UPDATE_PROPERTY_ALL_CTX	0xFF
+	__u8 context_id;
+	__u8 reserved[7];
+	__u32 time_quota_us;
+	__u32 reserved1;
+} __packed;
+
+struct update_property_resp {
+	enum aie2_msg_status status;
+} __packed;
+
 #endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index c9c23c889c78..7ed99ea471a9 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
 module_param(aie2_max_col, uint, 0600);
 MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
 
+#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
+static uint time_quantum_ms = 30; /* milliseconds */
+module_param(time_quantum_ms, uint, 0400);
+MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. Default 30 ms, MAX 2000 ms");
+
 static char *npu_fw[] = {
 	"npu_7.sbin",
 	"npu.sbin"
@@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
 		return ret;
 	}
 
+	if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
+		XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", time_quantum_ms);
+		return -EINVAL;
+	}
+
+	ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 1000);
+	if (ret) {
+		XDNA_ERR(ndev->aie.xdna, "Failed to update execution time quantum");
+		return ret;
+	}
+
 	ret = aie2_xdna_reset(ndev);
 	if (ret) {
 		XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index f83deca2b51a..69b53c7bcb86 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -222,6 +222,7 @@ enum aie2_fw_feature {
 	AIE2_PREEMPT,
 	AIE2_TEMPORAL_ONLY,
 	AIE2_APP_HEALTH,
+	AIE2_UPDATE_PROPERTY,
 	AIE2_FEATURE_MAX
 };
 
@@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 		 int (*notify_cb)(void *, void __iomem *, size_t));
 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 			 int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
 void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
 			    dma_addr_t *dma_addr);
 void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index a3b6df56abd0..6ebf75ad5fb4 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
 
 const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
 	{ .major = 6, .min_minor = 12 },
-	{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
 	{ .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 },
 	{ .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 },
+	{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
+	{ .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, .min_minor = 15 },
 	{ .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor = 18 },
 	{ .features = AIE2_ALL_FEATURES, .major = 7 },
 	{ 0 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 16:56 [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support Lizhi Hou
@ 2026-04-14 16:58 ` Mario Limonciello
  2026-04-14 17:16   ` Lizhi Hou
  0 siblings, 1 reply; 10+ messages in thread
From: Mario Limonciello @ 2026-04-14 16:58 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan



On 4/14/26 11:56, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
> 
> Add support for configuring the hardware scheduler time quantum to
> improve fairness across concurrent contexts.
> 
> The scheduler enforces a fixed time slice per context, preventing
> long-running workloads from monopolizing the device and allowing
> other contexts to make forward progress.
> 
> The default time quantum is 30ms and can be configured via the
> time_quantum_ms module parameter.

Can you talk more about how you want to use it?  Adding new module 
parameters is generally frowned upon in lieu of doing something with 
debugfs at runtime.

IE if you can export it as a debugfs file that when you write to it 
updates the quantum or updates it and restarts the driver this might be 
more preferable.

> 
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_message.c  | 44 +++++++++++++++++++++++++++
>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>   5 files changed, 80 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index e52dc7ea9fc7..976ad6281078 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
>   	aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>   	return ret;
>   }
> +
> +static int
> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
> +			     struct amdxdna_hwctx *ctx, u32 type, u32 value)
> +{
> +	DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	int ret;
> +
> +	if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
> +		return -EOPNOTSUPP;
> +
> +	if (ctx)
> +		req.context_id = ctx->fw_ctx_id;
> +	else
> +		req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
> +
> +	req.time_quota_us = value;
> +	req.type = type;
> +
> +	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +	if (ret) {
> +		XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
> +			 ctx ? ctx->name : "ctx.all", type, ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us)
> +{
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	int ret;
> +
> +	ret = aie2_runtime_update_ctx_prop(ndev, NULL, UPDATE_PROPERTY_TIME_QUOTA, us);
> +	if (ret == -EOPNOTSUPP) {
> +		XDNA_DBG(xdna, "update time quota not support, skipped");
> +		ret = 0;
> +	} else if (!ret) {
> +		XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", us);
> +	}
> +	return ret;
> +}
> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
> index f18e89a39e35..fc2e99510980 100644
> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>   	MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>   	MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>   	MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
> +	MSG_OP_UPDATE_PROPERTY             = 0x113,
>   	MSG_OP_GET_APP_HEALTH              = 0x114,
>   	MSG_OP_MAX_DRV_OPCODE,
>   	MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
> @@ -503,4 +504,19 @@ struct get_app_health_resp {
>   	__u32 required_buffer_size;
>   	__u32 reserved[7];
>   } __packed;
> +
> +struct update_property_req {
> +#define UPDATE_PROPERTY_TIME_QUOTA 0
> +	__u32 type;
> +#define AIE2_UPDATE_PROPERTY_ALL_CTX	0xFF
> +	__u8 context_id;
> +	__u8 reserved[7];
> +	__u32 time_quota_us;
> +	__u32 reserved1;
> +} __packed;
> +
> +struct update_property_resp {
> +	enum aie2_msg_status status;
> +} __packed;
> +
>   #endif /* _AIE2_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index c9c23c889c78..7ed99ea471a9 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>   module_param(aie2_max_col, uint, 0600);
>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>   
> +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
> +static uint time_quantum_ms = 30; /* milliseconds */
> +module_param(time_quantum_ms, uint, 0400);
> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. Default 30 ms, MAX 2000 ms");
> +
>   static char *npu_fw[] = {
>   	"npu_7.sbin",
>   	"npu.sbin"
> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
>   		return ret;
>   	}
>   
> +	if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
> +		XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", time_quantum_ms);
> +		return -EINVAL;
> +	}
> +
> +	ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 1000);
> +	if (ret) {
> +		XDNA_ERR(ndev->aie.xdna, "Failed to update execution time quantum");
> +		return ret;
> +	}
> +
>   	ret = aie2_xdna_reset(ndev);
>   	if (ret) {
>   		XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index f83deca2b51a..69b53c7bcb86 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>   	AIE2_PREEMPT,
>   	AIE2_TEMPORAL_ONLY,
>   	AIE2_APP_HEALTH,
> +	AIE2_UPDATE_PROPERTY,
>   	AIE2_FEATURE_MAX
>   };
>   
> @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
>   		 int (*notify_cb)(void *, void __iomem *, size_t));
>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
>   			 int (*notify_cb)(void *, void __iomem *, size_t));
> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
>   			    dma_addr_t *dma_addr);
>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index a3b6df56abd0..6ebf75ad5fb4 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>   
>   const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>   	{ .major = 6, .min_minor = 12 },
> -	{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
>   	{ .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 },
>   	{ .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 },
> +	{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
> +	{ .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, .min_minor = 15 },
>   	{ .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor = 18 },
>   	{ .features = AIE2_ALL_FEATURES, .major = 7 },
>   	{ 0 }


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 16:58 ` Mario Limonciello
@ 2026-04-14 17:16   ` Lizhi Hou
  2026-04-14 17:17     ` Mario Limonciello
  0 siblings, 1 reply; 10+ messages in thread
From: Lizhi Hou @ 2026-04-14 17:16 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan


On 4/14/26 09:58, Mario Limonciello wrote:
>
>
> On 4/14/26 11:56, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add support for configuring the hardware scheduler time quantum to
>> improve fairness across concurrent contexts.
>>
>> The scheduler enforces a fixed time slice per context, preventing
>> long-running workloads from monopolizing the device and allowing
>> other contexts to make forward progress.
>>
>> The default time quantum is 30ms and can be configured via the
>> time_quantum_ms module parameter.
>
> Can you talk more about how you want to use it?  Adding new module 
> parameters is generally frowned upon in lieu of doing something with 
> debugfs at runtime.

This is a static setting which is not supposed to change at runtime. So 
module parameter is used.

Lizhi

>
> IE if you can export it as a debugfs file that when you write to it 
> updates the quantum or updates it and restarts the driver this might 
> be more preferable.
>
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_message.c  | 44 +++++++++++++++++++++++++++
>>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>>   5 files changed, 80 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index e52dc7ea9fc7..976ad6281078 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct 
>> amdxdna_dev_hdl *ndev, u32 context_id,
>>       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>>       return ret;
>>   }
>> +
>> +static int
>> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
>> +                 struct amdxdna_hwctx *ctx, u32 type, u32 value)
>> +{
>> +    DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>> +    int ret;
>> +
>> +    if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
>> +        return -EOPNOTSUPP;
>> +
>> +    if (ctx)
>> +        req.context_id = ctx->fw_ctx_id;
>> +    else
>> +        req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
>> +
>> +    req.time_quota_us = value;
>> +    req.type = type;
>> +
>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
>> +             ctx ? ctx->name : "ctx.all", type, ret);
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us)
>> +{
>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>> +    int ret;
>> +
>> +    ret = aie2_runtime_update_ctx_prop(ndev, NULL, 
>> UPDATE_PROPERTY_TIME_QUOTA, us);
>> +    if (ret == -EOPNOTSUPP) {
>> +        XDNA_DBG(xdna, "update time quota not support, skipped");
>> +        ret = 0;
>> +    } else if (!ret) {
>> +        XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", us);
>> +    }
>> +    return ret;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h 
>> b/drivers/accel/amdxdna/aie2_msg_priv.h
>> index f18e89a39e35..fc2e99510980 100644
>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>>       MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>>       MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>>       MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
>> +    MSG_OP_UPDATE_PROPERTY             = 0x113,
>>       MSG_OP_GET_APP_HEALTH              = 0x114,
>>       MSG_OP_MAX_DRV_OPCODE,
>>       MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
>> @@ -503,4 +504,19 @@ struct get_app_health_resp {
>>       __u32 required_buffer_size;
>>       __u32 reserved[7];
>>   } __packed;
>> +
>> +struct update_property_req {
>> +#define UPDATE_PROPERTY_TIME_QUOTA 0
>> +    __u32 type;
>> +#define AIE2_UPDATE_PROPERTY_ALL_CTX    0xFF
>> +    __u8 context_id;
>> +    __u8 reserved[7];
>> +    __u32 time_quota_us;
>> +    __u32 reserved1;
>> +} __packed;
>> +
>> +struct update_property_resp {
>> +    enum aie2_msg_status status;
>> +} __packed;
>> +
>>   #endif /* _AIE2_MSG_PRIV_H_ */
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index c9c23c889c78..7ed99ea471a9 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>>   module_param(aie2_max_col, uint, 0600);
>>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>>   +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
>> +static uint time_quantum_ms = 30; /* milliseconds */
>> +module_param(time_quantum_ms, uint, 0400);
>> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. Default 
>> 30 ms, MAX 2000 ms");
>> +
>>   static char *npu_fw[] = {
>>       "npu_7.sbin",
>>       "npu.sbin"
>> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct 
>> amdxdna_dev_hdl *ndev)
>>           return ret;
>>       }
>>   +    if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
>> +        XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", 
>> time_quantum_ms);
>> +        return -EINVAL;
>> +    }
>> +
>> +    ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 1000);
>> +    if (ret) {
>> +        XDNA_ERR(ndev->aie.xdna, "Failed to update execution time 
>> quantum");
>> +        return ret;
>> +    }
>> +
>>       ret = aie2_xdna_reset(ndev);
>>       if (ret) {
>>           XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index f83deca2b51a..69b53c7bcb86 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>>       AIE2_PREEMPT,
>>       AIE2_TEMPORAL_ONLY,
>>       AIE2_APP_HEALTH,
>> +    AIE2_UPDATE_PROPERTY,
>>       AIE2_FEATURE_MAX
>>   };
>>   @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, 
>> struct amdxdna_sched_job *job,
>>            int (*notify_cb)(void *, void __iomem *, size_t));
>>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct 
>> amdxdna_sched_job *job,
>>                int (*notify_cb)(void *, void __iomem *, size_t));
>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
>>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
>>                   dma_addr_t *dma_addr);
>>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c 
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index a3b6df56abd0..6ebf75ad5fb4 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>     const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>>       { .major = 6, .min_minor = 12 },
>> -    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor 
>> = 15 },
>>       { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 
>> 12 },
>>       { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, 
>> .min_minor = 12 },
>> +    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor 
>> = 15 },
>> +    { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, 
>> .min_minor = 15 },
>>       { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor 
>> = 18 },
>>       { .features = AIE2_ALL_FEATURES, .major = 7 },
>>       { 0 }
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 17:16   ` Lizhi Hou
@ 2026-04-14 17:17     ` Mario Limonciello
  2026-04-14 17:28       ` Lizhi Hou
  0 siblings, 1 reply; 10+ messages in thread
From: Mario Limonciello @ 2026-04-14 17:17 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan



On 4/14/26 12:16, Lizhi Hou wrote:
> 
> On 4/14/26 09:58, Mario Limonciello wrote:
>>
>>
>> On 4/14/26 11:56, Lizhi Hou wrote:
>>> From: Max Zhen <max.zhen@amd.com>
>>>
>>> Add support for configuring the hardware scheduler time quantum to
>>> improve fairness across concurrent contexts.
>>>
>>> The scheduler enforces a fixed time slice per context, preventing
>>> long-running workloads from monopolizing the device and allowing
>>> other contexts to make forward progress.
>>>
>>> The default time quantum is 30ms and can be configured via the
>>> time_quantum_ms module parameter.
>>
>> Can you talk more about how you want to use it?  Adding new module 
>> parameters is generally frowned upon in lieu of doing something with 
>> debugfs at runtime.
> 
> This is a static setting which is not supposed to change at runtime. So 
> module parameter is used.

But so what happens if user loads driver with default setting and then 
unloads driver and loads with a different setting as module option?

Does this flow fall apart because the driver initially programmed 30ms?

> 
> Lizhi
> 
>>
>> IE if you can export it as a debugfs file that when you write to it 
>> updates the quantum or updates it and restarts the driver this might 
>> be more preferable.
>>
>>>
>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>> ---
>>>   drivers/accel/amdxdna/aie2_message.c  | 44 +++++++++++++++++++++++++++
>>>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>>>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>>>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>>>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>>>   5 files changed, 80 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/ 
>>> amdxdna/aie2_message.c
>>> index e52dc7ea9fc7..976ad6281078 100644
>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct 
>>> amdxdna_dev_hdl *ndev, u32 context_id,
>>>       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>>>       return ret;
>>>   }
>>> +
>>> +static int
>>> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
>>> +                 struct amdxdna_hwctx *ctx, u32 type, u32 value)
>>> +{
>>> +    DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>> +    int ret;
>>> +
>>> +    if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (ctx)
>>> +        req.context_id = ctx->fw_ctx_id;
>>> +    else
>>> +        req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
>>> +
>>> +    req.time_quota_us = value;
>>> +    req.type = type;
>>> +
>>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>> +    if (ret) {
>>> +        XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
>>> +             ctx ? ctx->name : "ctx.all", type, ret);
>>> +        return ret;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us)
>>> +{
>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>> +    int ret;
>>> +
>>> +    ret = aie2_runtime_update_ctx_prop(ndev, NULL, 
>>> UPDATE_PROPERTY_TIME_QUOTA, us);
>>> +    if (ret == -EOPNOTSUPP) {
>>> +        XDNA_DBG(xdna, "update time quota not support, skipped");
>>> +        ret = 0;
>>> +    } else if (!ret) {
>>> +        XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", us);
>>> +    }
>>> +    return ret;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/ 
>>> amdxdna/aie2_msg_priv.h
>>> index f18e89a39e35..fc2e99510980 100644
>>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>>> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>>>       MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>>>       MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>>>       MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
>>> +    MSG_OP_UPDATE_PROPERTY             = 0x113,
>>>       MSG_OP_GET_APP_HEALTH              = 0x114,
>>>       MSG_OP_MAX_DRV_OPCODE,
>>>       MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
>>> @@ -503,4 +504,19 @@ struct get_app_health_resp {
>>>       __u32 required_buffer_size;
>>>       __u32 reserved[7];
>>>   } __packed;
>>> +
>>> +struct update_property_req {
>>> +#define UPDATE_PROPERTY_TIME_QUOTA 0
>>> +    __u32 type;
>>> +#define AIE2_UPDATE_PROPERTY_ALL_CTX    0xFF
>>> +    __u8 context_id;
>>> +    __u8 reserved[7];
>>> +    __u32 time_quota_us;
>>> +    __u32 reserved1;
>>> +} __packed;
>>> +
>>> +struct update_property_resp {
>>> +    enum aie2_msg_status status;
>>> +} __packed;
>>> +
>>>   #endif /* _AIE2_MSG_PRIV_H_ */
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/ 
>>> amdxdna/aie2_pci.c
>>> index c9c23c889c78..7ed99ea471a9 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>>>   module_param(aie2_max_col, uint, 0600);
>>>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>>>   +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
>>> +static uint time_quantum_ms = 30; /* milliseconds */
>>> +module_param(time_quantum_ms, uint, 0400);
>>> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. Default 
>>> 30 ms, MAX 2000 ms");
>>> +
>>>   static char *npu_fw[] = {
>>>       "npu_7.sbin",
>>>       "npu.sbin"
>>> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct 
>>> amdxdna_dev_hdl *ndev)
>>>           return ret;
>>>       }
>>>   +    if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
>>> +        XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", 
>>> time_quantum_ms);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 1000);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->aie.xdna, "Failed to update execution time 
>>> quantum");
>>> +        return ret;
>>> +    }
>>> +
>>>       ret = aie2_xdna_reset(ndev);
>>>       if (ret) {
>>>           XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/ 
>>> amdxdna/aie2_pci.h
>>> index f83deca2b51a..69b53c7bcb86 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>>>       AIE2_PREEMPT,
>>>       AIE2_TEMPORAL_ONLY,
>>>       AIE2_APP_HEALTH,
>>> +    AIE2_UPDATE_PROPERTY,
>>>       AIE2_FEATURE_MAX
>>>   };
>>>   @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, 
>>> struct amdxdna_sched_job *job,
>>>            int (*notify_cb)(void *, void __iomem *, size_t));
>>>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct 
>>> amdxdna_sched_job *job,
>>>                int (*notify_cb)(void *, void __iomem *, size_t));
>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
>>>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
>>>                   dma_addr_t *dma_addr);
>>>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/ 
>>> amdxdna/npu4_regs.c
>>> index a3b6df56abd0..6ebf75ad5fb4 100644
>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>>     const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>>>       { .major = 6, .min_minor = 12 },
>>> -    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor 
>>> = 15 },
>>>       { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 
>>> 12 },
>>>       { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 
>>> 6, .min_minor = 12 },
>>> +    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor 
>>> = 15 },
>>> +    { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 
>>> 6, .min_minor = 15 },
>>>       { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor 
>>> = 18 },
>>>       { .features = AIE2_ALL_FEATURES, .major = 7 },
>>>       { 0 }
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 17:17     ` Mario Limonciello
@ 2026-04-14 17:28       ` Lizhi Hou
  2026-04-14 17:32         ` Mario Limonciello
  0 siblings, 1 reply; 10+ messages in thread
From: Lizhi Hou @ 2026-04-14 17:28 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan


On 4/14/26 10:17, Mario Limonciello wrote:
>
>
> On 4/14/26 12:16, Lizhi Hou wrote:
>>
>> On 4/14/26 09:58, Mario Limonciello wrote:
>>>
>>>
>>> On 4/14/26 11:56, Lizhi Hou wrote:
>>>> From: Max Zhen <max.zhen@amd.com>
>>>>
>>>> Add support for configuring the hardware scheduler time quantum to
>>>> improve fairness across concurrent contexts.
>>>>
>>>> The scheduler enforces a fixed time slice per context, preventing
>>>> long-running workloads from monopolizing the device and allowing
>>>> other contexts to make forward progress.
>>>>
>>>> The default time quantum is 30ms and can be configured via the
>>>> time_quantum_ms module parameter.
>>>
>>> Can you talk more about how you want to use it?  Adding new module 
>>> parameters is generally frowned upon in lieu of doing something with 
>>> debugfs at runtime.
>>
>> This is a static setting which is not supposed to change at runtime. 
>> So module parameter is used.
>
> But so what happens if user loads driver with default setting and then 
> unloads driver and loads with a different setting as module option?
>
> Does this flow fall apart because the driver initially programmed 30ms?

Reloading with new setting will overwrite the default setting. After the 
module is loaded, it is not supposed to change before unloading the module.


Lizhi

>
>>
>> Lizhi
>>
>>>
>>> IE if you can export it as a debugfs file that when you write to it 
>>> updates the quantum or updates it and restarts the driver this might 
>>> be more preferable.
>>>
>>>>
>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>> ---
>>>>   drivers/accel/amdxdna/aie2_message.c  | 44 
>>>> +++++++++++++++++++++++++++
>>>>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>>>>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>>>>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>>>>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>>>>   5 files changed, 80 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/ 
>>>> amdxdna/aie2_message.c
>>>> index e52dc7ea9fc7..976ad6281078 100644
>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct 
>>>> amdxdna_dev_hdl *ndev, u32 context_id,
>>>>       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>>>>       return ret;
>>>>   }
>>>> +
>>>> +static int
>>>> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
>>>> +                 struct amdxdna_hwctx *ctx, u32 type, u32 value)
>>>> +{
>>>> +    DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>> +    int ret;
>>>> +
>>>> +    if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
>>>> +        return -EOPNOTSUPP;
>>>> +
>>>> +    if (ctx)
>>>> +        req.context_id = ctx->fw_ctx_id;
>>>> +    else
>>>> +        req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
>>>> +
>>>> +    req.time_quota_us = value;
>>>> +    req.type = type;
>>>> +
>>>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>>> +    if (ret) {
>>>> +        XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
>>>> +             ctx ? ctx->name : "ctx.all", type, ret);
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us)
>>>> +{
>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>> +    int ret;
>>>> +
>>>> +    ret = aie2_runtime_update_ctx_prop(ndev, NULL, 
>>>> UPDATE_PROPERTY_TIME_QUOTA, us);
>>>> +    if (ret == -EOPNOTSUPP) {
>>>> +        XDNA_DBG(xdna, "update time quota not support, skipped");
>>>> +        ret = 0;
>>>> +    } else if (!ret) {
>>>> +        XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", us);
>>>> +    }
>>>> +    return ret;
>>>> +}
>>>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/ 
>>>> amdxdna/aie2_msg_priv.h
>>>> index f18e89a39e35..fc2e99510980 100644
>>>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>>>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>>>> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>>>>       MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>>>>       MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>>>>       MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
>>>> +    MSG_OP_UPDATE_PROPERTY             = 0x113,
>>>>       MSG_OP_GET_APP_HEALTH              = 0x114,
>>>>       MSG_OP_MAX_DRV_OPCODE,
>>>>       MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
>>>> @@ -503,4 +504,19 @@ struct get_app_health_resp {
>>>>       __u32 required_buffer_size;
>>>>       __u32 reserved[7];
>>>>   } __packed;
>>>> +
>>>> +struct update_property_req {
>>>> +#define UPDATE_PROPERTY_TIME_QUOTA 0
>>>> +    __u32 type;
>>>> +#define AIE2_UPDATE_PROPERTY_ALL_CTX    0xFF
>>>> +    __u8 context_id;
>>>> +    __u8 reserved[7];
>>>> +    __u32 time_quota_us;
>>>> +    __u32 reserved1;
>>>> +} __packed;
>>>> +
>>>> +struct update_property_resp {
>>>> +    enum aie2_msg_status status;
>>>> +} __packed;
>>>> +
>>>>   #endif /* _AIE2_MSG_PRIV_H_ */
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/ 
>>>> amdxdna/aie2_pci.c
>>>> index c9c23c889c78..7ed99ea471a9 100644
>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>>>>   module_param(aie2_max_col, uint, 0600);
>>>>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>>>>   +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
>>>> +static uint time_quantum_ms = 30; /* milliseconds */
>>>> +module_param(time_quantum_ms, uint, 0400);
>>>> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. Default 
>>>> 30 ms, MAX 2000 ms");
>>>> +
>>>>   static char *npu_fw[] = {
>>>>       "npu_7.sbin",
>>>>       "npu.sbin"
>>>> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct 
>>>> amdxdna_dev_hdl *ndev)
>>>>           return ret;
>>>>       }
>>>>   +    if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
>>>> +        XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", 
>>>> time_quantum_ms);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 1000);
>>>> +    if (ret) {
>>>> +        XDNA_ERR(ndev->aie.xdna, "Failed to update execution time 
>>>> quantum");
>>>> +        return ret;
>>>> +    }
>>>> +
>>>>       ret = aie2_xdna_reset(ndev);
>>>>       if (ret) {
>>>>           XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/ 
>>>> amdxdna/aie2_pci.h
>>>> index f83deca2b51a..69b53c7bcb86 100644
>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>>>>       AIE2_PREEMPT,
>>>>       AIE2_TEMPORAL_ONLY,
>>>>       AIE2_APP_HEALTH,
>>>> +    AIE2_UPDATE_PROPERTY,
>>>>       AIE2_FEATURE_MAX
>>>>   };
>>>>   @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, 
>>>> struct amdxdna_sched_job *job,
>>>>            int (*notify_cb)(void *, void __iomem *, size_t));
>>>>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct 
>>>> amdxdna_sched_job *job,
>>>>                int (*notify_cb)(void *, void __iomem *, size_t));
>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 
>>>> us);
>>>>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
>>>>                   dma_addr_t *dma_addr);
>>>>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
>>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/ 
>>>> amdxdna/npu4_regs.c
>>>> index a3b6df56abd0..6ebf75ad5fb4 100644
>>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>>> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>>>     const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>>>>       { .major = 6, .min_minor = 12 },
>>>> -    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, 
>>>> .min_minor = 15 },
>>>>       { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 
>>>> 12 },
>>>>       { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, 
>>>> .min_minor = 12 },
>>>> +    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, 
>>>> .min_minor = 15 },
>>>> +    { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, 
>>>> .min_minor = 15 },
>>>>       { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, 
>>>> .min_minor = 18 },
>>>>       { .features = AIE2_ALL_FEATURES, .major = 7 },
>>>>       { 0 }
>>>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 17:28       ` Lizhi Hou
@ 2026-04-14 17:32         ` Mario Limonciello
  2026-04-14 17:52           ` Zhen, Max
                             ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Mario Limonciello @ 2026-04-14 17:32 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski,
	gregkh
  Cc: Max Zhen, linux-kernel, sonal.santan



On 4/14/26 12:28, Lizhi Hou wrote:
> 
> On 4/14/26 10:17, Mario Limonciello wrote:
>>
>>
>> On 4/14/26 12:16, Lizhi Hou wrote:
>>>
>>> On 4/14/26 09:58, Mario Limonciello wrote:
>>>>
>>>>
>>>> On 4/14/26 11:56, Lizhi Hou wrote:
>>>>> From: Max Zhen <max.zhen@amd.com>
>>>>>
>>>>> Add support for configuring the hardware scheduler time quantum to
>>>>> improve fairness across concurrent contexts.
>>>>>
>>>>> The scheduler enforces a fixed time slice per context, preventing
>>>>> long-running workloads from monopolizing the device and allowing
>>>>> other contexts to make forward progress.
>>>>>
>>>>> The default time quantum is 30ms and can be configured via the
>>>>> time_quantum_ms module parameter.
>>>>
>>>> Can you talk more about how you want to use it?  Adding new module 
>>>> parameters is generally frowned upon in lieu of doing something with 
>>>> debugfs at runtime.
>>>
>>> This is a static setting which is not supposed to change at runtime. 
>>> So module parameter is used.
>>
>> But so what happens if user loads driver with default setting and then 
>> unloads driver and loads with a different setting as module option?
>>
>> Does this flow fall apart because the driver initially programmed 30ms?
> 
> Reloading with new setting will overwrite the default setting. After the 
> module is loaded, it is not supposed to change before unloading the module.
> 

+ Greg

Greg,

How do you feel about a module parameter for this purpose?  Any other 
suggestions if you don't like it?

I was thinking a debugfs file still makes sense, but either the debugfs 
file can do unbind/rebind internally or user using debugfs file can do 
the unbind/bind sequence in sysfs after touching the debugfs file.

Here is full thread in case you don't have:

https://lore.kernel.org/dri-devel/20260414165602.788811-1-lizhi.hou@amd.com/

> 
> Lizhi
> 
>>
>>>
>>> Lizhi
>>>
>>>>
>>>> IE if you can export it as a debugfs file that when you write to it 
>>>> updates the quantum or updates it and restarts the driver this might 
>>>> be more preferable.
>>>>
>>>>>
>>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>>> ---
>>>>>   drivers/accel/amdxdna/aie2_message.c  | 44 ++++++++++++++++++++++ 
>>>>> +++++
>>>>>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>>>>>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>>>>>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>>>>>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>>>>>   5 files changed, 80 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/ 
>>>>> amdxdna/aie2_message.c
>>>>> index e52dc7ea9fc7..976ad6281078 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>>> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct 
>>>>> amdxdna_dev_hdl *ndev, u32 context_id,
>>>>>       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>>>>>       return ret;
>>>>>   }
>>>>> +
>>>>> +static int
>>>>> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
>>>>> +                 struct amdxdna_hwctx *ctx, u32 type, u32 value)
>>>>> +{
>>>>> +    DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
>>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    if (ctx)
>>>>> +        req.context_id = ctx->fw_ctx_id;
>>>>> +    else
>>>>> +        req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
>>>>> +
>>>>> +    req.time_quota_us = value;
>>>>> +    req.type = type;
>>>>> +
>>>>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>>>> +    if (ret) {
>>>>> +        XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
>>>>> +             ctx ? ctx->name : "ctx.all", type, ret);
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us)
>>>>> +{
>>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = aie2_runtime_update_ctx_prop(ndev, NULL, 
>>>>> UPDATE_PROPERTY_TIME_QUOTA, us);
>>>>> +    if (ret == -EOPNOTSUPP) {
>>>>> +        XDNA_DBG(xdna, "update time quota not support, skipped");
>>>>> +        ret = 0;
>>>>> +    } else if (!ret) {
>>>>> +        XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", us);
>>>>> +    }
>>>>> +    return ret;
>>>>> +}
>>>>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/ 
>>>>> amdxdna/aie2_msg_priv.h
>>>>> index f18e89a39e35..fc2e99510980 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>>>>>       MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>>>>>       MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>>>>>       MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
>>>>> +    MSG_OP_UPDATE_PROPERTY             = 0x113,
>>>>>       MSG_OP_GET_APP_HEALTH              = 0x114,
>>>>>       MSG_OP_MAX_DRV_OPCODE,
>>>>>       MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
>>>>> @@ -503,4 +504,19 @@ struct get_app_health_resp {
>>>>>       __u32 required_buffer_size;
>>>>>       __u32 reserved[7];
>>>>>   } __packed;
>>>>> +
>>>>> +struct update_property_req {
>>>>> +#define UPDATE_PROPERTY_TIME_QUOTA 0
>>>>> +    __u32 type;
>>>>> +#define AIE2_UPDATE_PROPERTY_ALL_CTX    0xFF
>>>>> +    __u8 context_id;
>>>>> +    __u8 reserved[7];
>>>>> +    __u32 time_quota_us;
>>>>> +    __u32 reserved1;
>>>>> +} __packed;
>>>>> +
>>>>> +struct update_property_resp {
>>>>> +    enum aie2_msg_status status;
>>>>> +} __packed;
>>>>> +
>>>>>   #endif /* _AIE2_MSG_PRIV_H_ */
>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/ 
>>>>> amdxdna/aie2_pci.c
>>>>> index c9c23c889c78..7ed99ea471a9 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>>>>>   module_param(aie2_max_col, uint, 0600);
>>>>>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>>>>>   +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
>>>>> +static uint time_quantum_ms = 30; /* milliseconds */
>>>>> +module_param(time_quantum_ms, uint, 0400);
>>>>> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. Default 
>>>>> 30 ms, MAX 2000 ms");
>>>>> +
>>>>>   static char *npu_fw[] = {
>>>>>       "npu_7.sbin",
>>>>>       "npu.sbin"
>>>>> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct 
>>>>> amdxdna_dev_hdl *ndev)
>>>>>           return ret;
>>>>>       }
>>>>>   +    if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
>>>>> +        XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", 
>>>>> time_quantum_ms);
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 1000);
>>>>> +    if (ret) {
>>>>> +        XDNA_ERR(ndev->aie.xdna, "Failed to update execution time 
>>>>> quantum");
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>>       ret = aie2_xdna_reset(ndev);
>>>>>       if (ret) {
>>>>>           XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/ 
>>>>> amdxdna/aie2_pci.h
>>>>> index f83deca2b51a..69b53c7bcb86 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>>> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>>>>>       AIE2_PREEMPT,
>>>>>       AIE2_TEMPORAL_ONLY,
>>>>>       AIE2_APP_HEALTH,
>>>>> +    AIE2_UPDATE_PROPERTY,
>>>>>       AIE2_FEATURE_MAX
>>>>>   };
>>>>>   @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, 
>>>>> struct amdxdna_sched_job *job,
>>>>>            int (*notify_cb)(void *, void __iomem *, size_t));
>>>>>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct 
>>>>> amdxdna_sched_job *job,
>>>>>                int (*notify_cb)(void *, void __iomem *, size_t));
>>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 
>>>>> us);
>>>>>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
>>>>>                   dma_addr_t *dma_addr);
>>>>>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
>>>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/ 
>>>>> amdxdna/npu4_regs.c
>>>>> index a3b6df56abd0..6ebf75ad5fb4 100644
>>>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>>>> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>>>>     const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>>>>>       { .major = 6, .min_minor = 12 },
>>>>> -    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 
>>>>> 6, .min_minor = 15 },
>>>>>       { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 
>>>>> 12 },
>>>>>       { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 
>>>>> 6, .min_minor = 12 },
>>>>> +    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 
>>>>> 6, .min_minor = 15 },
>>>>> +    { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 
>>>>> 6, .min_minor = 15 },
>>>>>       { .features = BIT_U64(AIE2_APP_HEALTH), .major = 
>>>>> 6, .min_minor = 18 },
>>>>>       { .features = AIE2_ALL_FEATURES, .major = 7 },
>>>>>       { 0 }
>>>>
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 17:32         ` Mario Limonciello
@ 2026-04-14 17:52           ` Zhen, Max
  2026-04-15 16:49           ` Lizhi Hou
  2026-04-18  7:35           ` Greg KH
  2 siblings, 0 replies; 10+ messages in thread
From: Zhen, Max @ 2026-04-14 17:52 UTC (permalink / raw)
  To: Limonciello, Mario, Hou, Lizhi, ogabbay@kernel.org,
	quic_jhugo@quicinc.com, dri-devel@lists.freedesktop.org,
	maciej.falkowski@linux.intel.com, gregkh@linuxfoundation.org
  Cc: linux-kernel@vger.kernel.org, Santan, Sonal

[AMD Official Use Only - AMD Internal Distribution Only]

Hi Mario,

I'm not quite sure what is the concern here. This setting can only be set before any application start using the device, hence we set it during probing of the driver and implemented it as a module parameter. We can't allow user to change the setting after driver is fully initialized, so can't really do it through debugfs. It is not like we have a choice and can do it either way.

Thanks,
Max

-----Original Message-----
From: Limonciello, Mario <Mario.Limonciello@amd.com>
Sent: Tuesday, April 14, 2026 10:32
To: Hou, Lizhi <lizhi.hou@amd.com>; ogabbay@kernel.org; quic_jhugo@quicinc.com; dri-devel@lists.freedesktop.org; maciej.falkowski@linux.intel.com; gregkh@linuxfoundation.org
Cc: Zhen, Max <Max.Zhen@amd.com>; linux-kernel@vger.kernel.org; Santan, Sonal <sonal.santan@amd.com>
Subject: Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support



On 4/14/26 12:28, Lizhi Hou wrote:
>
> On 4/14/26 10:17, Mario Limonciello wrote:
>>
>>
>> On 4/14/26 12:16, Lizhi Hou wrote:
>>>
>>> On 4/14/26 09:58, Mario Limonciello wrote:
>>>>
>>>>
>>>> On 4/14/26 11:56, Lizhi Hou wrote:
>>>>> From: Max Zhen <max.zhen@amd.com>
>>>>>
>>>>> Add support for configuring the hardware scheduler time quantum to
>>>>> improve fairness across concurrent contexts.
>>>>>
>>>>> The scheduler enforces a fixed time slice per context, preventing
>>>>> long-running workloads from monopolizing the device and allowing
>>>>> other contexts to make forward progress.
>>>>>
>>>>> The default time quantum is 30ms and can be configured via the
>>>>> time_quantum_ms module parameter.
>>>>
>>>> Can you talk more about how you want to use it?  Adding new module
>>>> parameters is generally frowned upon in lieu of doing something
>>>> with debugfs at runtime.
>>>
>>> This is a static setting which is not supposed to change at runtime.
>>> So module parameter is used.
>>
>> But so what happens if user loads driver with default setting and
>> then unloads driver and loads with a different setting as module option?
>>
>> Does this flow fall apart because the driver initially programmed 30ms?
>
> Reloading with new setting will overwrite the default setting. After
> the module is loaded, it is not supposed to change before unloading the module.
>

+ Greg

Greg,

How do you feel about a module parameter for this purpose?  Any other suggestions if you don't like it?

I was thinking a debugfs file still makes sense, but either the debugfs file can do unbind/rebind internally or user using debugfs file can do the unbind/bind sequence in sysfs after touching the debugfs file.

Here is full thread in case you don't have:

https://lore.kernel.org/dri-devel/20260414165602.788811-1-lizhi.hou@amd.com/

>
> Lizhi
>
>>
>>>
>>> Lizhi
>>>
>>>>
>>>> IE if you can export it as a debugfs file that when you write to it
>>>> updates the quantum or updates it and restarts the driver this
>>>> might be more preferable.
>>>>
>>>>>
>>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>>> ---
>>>>>   drivers/accel/amdxdna/aie2_message.c  | 44
>>>>> ++++++++++++++++++++++
>>>>> +++++
>>>>>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>>>>>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>>>>>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>>>>>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>>>>>   5 files changed, 80 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/
>>>>> amdxdna/aie2_message.c index e52dc7ea9fc7..976ad6281078 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>>> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct
>>>>> amdxdna_dev_hdl *ndev, u32 context_id,
>>>>>       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>>>>>       return ret;
>>>>>   }
>>>>> +
>>>>> +static int
>>>>> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
>>>>> +                 struct amdxdna_hwctx *ctx, u32 type, u32 value)
>>>>> +{
>>>>> +    DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
>>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    if (ctx)
>>>>> +        req.context_id = ctx->fw_ctx_id;
>>>>> +    else
>>>>> +        req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
>>>>> +
>>>>> +    req.time_quota_us = value;
>>>>> +    req.type = type;
>>>>> +
>>>>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>>>> +    if (ret) {
>>>>> +        XDNA_ERR(xdna, "%s update property failed, type %d ret
>>>>> +%d",
>>>>> +             ctx ? ctx->name : "ctx.all", type, ret);
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32
>>>>> +us) {
>>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = aie2_runtime_update_ctx_prop(ndev, NULL,
>>>>> UPDATE_PROPERTY_TIME_QUOTA, us);
>>>>> +    if (ret == -EOPNOTSUPP) {
>>>>> +        XDNA_DBG(xdna, "update time quota not support, skipped");
>>>>> +        ret = 0;
>>>>> +    } else if (!ret) {
>>>>> +        XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us",
>>>>> +us);
>>>>> +    }
>>>>> +    return ret;
>>>>> +}
>>>>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>> b/drivers/accel/ amdxdna/aie2_msg_priv.h index
>>>>> f18e89a39e35..fc2e99510980 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>>>>>       MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>>>>>       MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>>>>>       MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
>>>>> +    MSG_OP_UPDATE_PROPERTY             = 0x113,
>>>>>       MSG_OP_GET_APP_HEALTH              = 0x114,
>>>>>       MSG_OP_MAX_DRV_OPCODE,
>>>>>       MSG_OP_GET_PROTOCOL_VERSION        = 0x301, @@ -503,4
>>>>> +504,19 @@ struct get_app_health_resp {
>>>>>       __u32 required_buffer_size;
>>>>>       __u32 reserved[7];
>>>>>   } __packed;
>>>>> +
>>>>> +struct update_property_req {
>>>>> +#define UPDATE_PROPERTY_TIME_QUOTA 0
>>>>> +    __u32 type;
>>>>> +#define AIE2_UPDATE_PROPERTY_ALL_CTX    0xFF
>>>>> +    __u8 context_id;
>>>>> +    __u8 reserved[7];
>>>>> +    __u32 time_quota_us;
>>>>> +    __u32 reserved1;
>>>>> +} __packed;
>>>>> +
>>>>> +struct update_property_resp {
>>>>> +    enum aie2_msg_status status;
>>>>> +} __packed;
>>>>> +
>>>>>   #endif /* _AIE2_MSG_PRIV_H_ */
>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>>>> amdxdna/aie2_pci.c index c9c23c889c78..7ed99ea471a9 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>>>>>   module_param(aie2_max_col, uint, 0600);
>>>>>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>>>>>   +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
>>>>> +static uint time_quantum_ms = 30; /* milliseconds */
>>>>> +module_param(time_quantum_ms, uint, 0400);
>>>>> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum.
>>>>> +Default
>>>>> 30 ms, MAX 2000 ms");
>>>>> +
>>>>>   static char *npu_fw[] = {
>>>>>       "npu_7.sbin",
>>>>>       "npu.sbin"
>>>>> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct
>>>>> amdxdna_dev_hdl *ndev)
>>>>>           return ret;
>>>>>       }
>>>>>   +    if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
>>>>> +        XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u",
>>>>> time_quantum_ms);
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    ret = aie2_update_prop_time_quota(ndev, time_quantum_ms *
>>>>> +1000);
>>>>> +    if (ret) {
>>>>> +        XDNA_ERR(ndev->aie.xdna, "Failed to update execution time
>>>>> quantum");
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>>       ret = aie2_xdna_reset(ndev);
>>>>>       if (ret) {
>>>>>           XDNA_ERR(ndev->aie.xdna, "Reset firmware failed"); diff
>>>>> --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>>>> amdxdna/aie2_pci.h index f83deca2b51a..69b53c7bcb86 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>>> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>>>>>       AIE2_PREEMPT,
>>>>>       AIE2_TEMPORAL_ONLY,
>>>>>       AIE2_APP_HEALTH,
>>>>> +    AIE2_UPDATE_PROPERTY,
>>>>>       AIE2_FEATURE_MAX
>>>>>   };
>>>>>   @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx
>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>>            int (*notify_cb)(void *, void __iomem *, size_t));
>>>>>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct
>>>>> amdxdna_sched_job *job,
>>>>>                int (*notify_cb)(void *, void __iomem *, size_t));
>>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32
>>>>> us);
>>>>>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32
>>>>> *size,
>>>>>                   dma_addr_t *dma_addr);
>>>>>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t
>>>>> size, diff --git a/drivers/accel/amdxdna/npu4_regs.c
>>>>> b/drivers/accel/ amdxdna/npu4_regs.c index
>>>>> a3b6df56abd0..6ebf75ad5fb4 100644
>>>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>>>> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[]
>>>>> = {
>>>>>     const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] =
>>>>> {
>>>>>       { .major = 6, .min_minor = 12 },
>>>>> -    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6,
>>>>> .min_minor = 15 },
>>>>>       { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor
>>>>> =
>>>>> 12 },
>>>>>       { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6,
>>>>> .min_minor = 12 },
>>>>> +    { .features = BIT_U64(AIE2_NPU_COMMAND), .major =
>>>>> 6, .min_minor = 15 },
>>>>> +    { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major =
>>>>> 6, .min_minor = 15 },
>>>>>       { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6,
>>>>> .min_minor = 18 },
>>>>>       { .features = AIE2_ALL_FEATURES, .major = 7 },
>>>>>       { 0 }
>>>>
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 17:32         ` Mario Limonciello
  2026-04-14 17:52           ` Zhen, Max
@ 2026-04-15 16:49           ` Lizhi Hou
  2026-04-18  7:35           ` Greg KH
  2 siblings, 0 replies; 10+ messages in thread
From: Lizhi Hou @ 2026-04-15 16:49 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	maciej.falkowski, gregkh
  Cc: Max Zhen, linux-kernel, sonal.santan


On 4/14/26 10:32, Mario Limonciello wrote:
>
>
> On 4/14/26 12:28, Lizhi Hou wrote:
>>
>> On 4/14/26 10:17, Mario Limonciello wrote:
>>>
>>>
>>> On 4/14/26 12:16, Lizhi Hou wrote:
>>>>
>>>> On 4/14/26 09:58, Mario Limonciello wrote:
>>>>>
>>>>>
>>>>> On 4/14/26 11:56, Lizhi Hou wrote:
>>>>>> From: Max Zhen <max.zhen@amd.com>
>>>>>>
>>>>>> Add support for configuring the hardware scheduler time quantum to
>>>>>> improve fairness across concurrent contexts.
>>>>>>
>>>>>> The scheduler enforces a fixed time slice per context, preventing
>>>>>> long-running workloads from monopolizing the device and allowing
>>>>>> other contexts to make forward progress.
>>>>>>
>>>>>> The default time quantum is 30ms and can be configured via the
>>>>>> time_quantum_ms module parameter.
>>>>>
>>>>> Can you talk more about how you want to use it?  Adding new module 
>>>>> parameters is generally frowned upon in lieu of doing something 
>>>>> with debugfs at runtime.
>>>>
>>>> This is a static setting which is not supposed to change at 
>>>> runtime. So module parameter is used.
>>>
>>> But so what happens if user loads driver with default setting and 
>>> then unloads driver and loads with a different setting as module 
>>> option?
>>>
>>> Does this flow fall apart because the driver initially programmed 30ms?
>>
>> Reloading with new setting will overwrite the default setting. After 
>> the module is loaded, it is not supposed to change before unloading 
>> the module.
>>
>
> + Greg
>
> Greg,
>
> How do you feel about a module parameter for this purpose?  Any other 
> suggestions if you don't like it?
>
> I was thinking a debugfs file still makes sense, but either the 
> debugfs file can do unbind/rebind internally or user using debugfs 
> file can do the unbind/bind sequence in sysfs after touching the 
> debugfs file.
>
> Here is full thread in case you don't have:
>
> https://lore.kernel.org/dri-devel/20260414165602.788811-1-lizhi.hou@amd.com/ 
>

After discussing, I will remove the module parameter and always use 
default 30ms for now.


Lizhi

>
>>
>> Lizhi
>>
>>>
>>>>
>>>> Lizhi
>>>>
>>>>>
>>>>> IE if you can export it as a debugfs file that when you write to 
>>>>> it updates the quantum or updates it and restarts the driver this 
>>>>> might be more preferable.
>>>>>
>>>>>>
>>>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>>>> ---
>>>>>>   drivers/accel/amdxdna/aie2_message.c  | 44 
>>>>>> ++++++++++++++++++++++ +++++
>>>>>>   drivers/accel/amdxdna/aie2_msg_priv.h | 16 ++++++++++
>>>>>>   drivers/accel/amdxdna/aie2_pci.c      | 16 ++++++++++
>>>>>>   drivers/accel/amdxdna/aie2_pci.h      |  2 ++
>>>>>>   drivers/accel/amdxdna/npu4_regs.c     |  3 +-
>>>>>>   5 files changed, 80 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>>>>>> b/drivers/accel/ amdxdna/aie2_message.c
>>>>>> index e52dc7ea9fc7..976ad6281078 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>>>> @@ -1200,3 +1200,47 @@ int aie2_query_app_health(struct 
>>>>>> amdxdna_dev_hdl *ndev, u32 context_id,
>>>>>>       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
>>>>>>       return ret;
>>>>>>   }
>>>>>> +
>>>>>> +static int
>>>>>> +aie2_runtime_update_ctx_prop(struct amdxdna_dev_hdl *ndev,
>>>>>> +                 struct amdxdna_hwctx *ctx, u32 type, u32 value)
>>>>>> +{
>>>>>> +    DECLARE_AIE_MSG(update_property, MSG_OP_UPDATE_PROPERTY);
>>>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    if (!AIE_FEATURE_ON(&ndev->aie, AIE2_UPDATE_PROPERTY))
>>>>>> +        return -EOPNOTSUPP;
>>>>>> +
>>>>>> +    if (ctx)
>>>>>> +        req.context_id = ctx->fw_ctx_id;
>>>>>> +    else
>>>>>> +        req.context_id = AIE2_UPDATE_PROPERTY_ALL_CTX;
>>>>>> +
>>>>>> +    req.time_quota_us = value;
>>>>>> +    req.type = type;
>>>>>> +
>>>>>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>>>>> +    if (ret) {
>>>>>> +        XDNA_ERR(xdna, "%s update property failed, type %d ret %d",
>>>>>> +             ctx ? ctx->name : "ctx.all", type, ret);
>>>>>> +        return ret;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, 
>>>>>> u32 us)
>>>>>> +{
>>>>>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = aie2_runtime_update_ctx_prop(ndev, NULL, 
>>>>>> UPDATE_PROPERTY_TIME_QUOTA, us);
>>>>>> +    if (ret == -EOPNOTSUPP) {
>>>>>> +        XDNA_DBG(xdna, "update time quota not support, skipped");
>>>>>> +        ret = 0;
>>>>>> +    } else if (!ret) {
>>>>>> +        XDNA_DBG(xdna, "Ctx exec time quantum updated to %u us", 
>>>>>> us);
>>>>>> +    }
>>>>>> +    return ret;
>>>>>> +}
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h 
>>>>>> b/drivers/accel/ amdxdna/aie2_msg_priv.h
>>>>>> index f18e89a39e35..fc2e99510980 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>>>>>> @@ -31,6 +31,7 @@ enum aie2_msg_opcode {
>>>>>>       MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
>>>>>>       MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
>>>>>>       MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
>>>>>> +    MSG_OP_UPDATE_PROPERTY             = 0x113,
>>>>>>       MSG_OP_GET_APP_HEALTH              = 0x114,
>>>>>>       MSG_OP_MAX_DRV_OPCODE,
>>>>>>       MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
>>>>>> @@ -503,4 +504,19 @@ struct get_app_health_resp {
>>>>>>       __u32 required_buffer_size;
>>>>>>       __u32 reserved[7];
>>>>>>   } __packed;
>>>>>> +
>>>>>> +struct update_property_req {
>>>>>> +#define UPDATE_PROPERTY_TIME_QUOTA 0
>>>>>> +    __u32 type;
>>>>>> +#define AIE2_UPDATE_PROPERTY_ALL_CTX    0xFF
>>>>>> +    __u8 context_id;
>>>>>> +    __u8 reserved[7];
>>>>>> +    __u32 time_quota_us;
>>>>>> +    __u32 reserved1;
>>>>>> +} __packed;
>>>>>> +
>>>>>> +struct update_property_resp {
>>>>>> +    enum aie2_msg_status status;
>>>>>> +} __packed;
>>>>>> +
>>>>>>   #endif /* _AIE2_MSG_PRIV_H_ */
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/ 
>>>>>> amdxdna/aie2_pci.c
>>>>>> index c9c23c889c78..7ed99ea471a9 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>>> @@ -33,6 +33,11 @@ static int aie2_max_col = XRS_MAX_COL;
>>>>>>   module_param(aie2_max_col, uint, 0600);
>>>>>>   MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>>>>>>   +#define MAX_TIME_QUANTUM_MS 2000 /* milliseconds */
>>>>>> +static uint time_quantum_ms = 30; /* milliseconds */
>>>>>> +module_param(time_quantum_ms, uint, 0400);
>>>>>> +MODULE_PARM_DESC(time_quantum_ms, "Execution time quantum. 
>>>>>> Default 30 ms, MAX 2000 ms");
>>>>>> +
>>>>>>   static char *npu_fw[] = {
>>>>>>       "npu_7.sbin",
>>>>>>       "npu.sbin"
>>>>>> @@ -186,6 +191,17 @@ static int aie2_mgmt_fw_init(struct 
>>>>>> amdxdna_dev_hdl *ndev)
>>>>>>           return ret;
>>>>>>       }
>>>>>>   +    if (time_quantum_ms > MAX_TIME_QUANTUM_MS) {
>>>>>> +        XDNA_ERR(ndev->aie.xdna, "Bad time quantum %u", 
>>>>>> time_quantum_ms);
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = aie2_update_prop_time_quota(ndev, time_quantum_ms * 
>>>>>> 1000);
>>>>>> +    if (ret) {
>>>>>> +        XDNA_ERR(ndev->aie.xdna, "Failed to update execution 
>>>>>> time quantum");
>>>>>> +        return ret;
>>>>>> +    }
>>>>>> +
>>>>>>       ret = aie2_xdna_reset(ndev);
>>>>>>       if (ret) {
>>>>>>           XDNA_ERR(ndev->aie.xdna, "Reset firmware failed");
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/ 
>>>>>> amdxdna/aie2_pci.h
>>>>>> index f83deca2b51a..69b53c7bcb86 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>>>> @@ -222,6 +222,7 @@ enum aie2_fw_feature {
>>>>>>       AIE2_PREEMPT,
>>>>>>       AIE2_TEMPORAL_ONLY,
>>>>>>       AIE2_APP_HEALTH,
>>>>>> +    AIE2_UPDATE_PROPERTY,
>>>>>>       AIE2_FEATURE_MAX
>>>>>>   };
>>>>>>   @@ -308,6 +309,7 @@ int aie2_sync_bo(struct amdxdna_hwctx 
>>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>>>            int (*notify_cb)(void *, void __iomem *, size_t));
>>>>>>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct 
>>>>>> amdxdna_sched_job *job,
>>>>>>                int (*notify_cb)(void *, void __iomem *, size_t));
>>>>>> +int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, 
>>>>>> u32 us);
>>>>>>   void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 
>>>>>> *size,
>>>>>>                   dma_addr_t *dma_addr);
>>>>>>   void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t 
>>>>>> size,
>>>>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/ 
>>>>>> amdxdna/npu4_regs.c
>>>>>> index a3b6df56abd0..6ebf75ad5fb4 100644
>>>>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>>>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>>>>> @@ -93,9 +93,10 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] 
>>>>>> = {
>>>>>>     const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>>>>>>       { .major = 6, .min_minor = 12 },
>>>>>> -    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, 
>>>>>> .min_minor = 15 },
>>>>>>       { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor 
>>>>>> = 12 },
>>>>>>       { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, 
>>>>>> .min_minor = 12 },
>>>>>> +    { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, 
>>>>>> .min_minor = 15 },
>>>>>> +    { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, 
>>>>>> .min_minor = 15 },
>>>>>>       { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, 
>>>>>> .min_minor = 18 },
>>>>>>       { .features = AIE2_ALL_FEATURES, .major = 7 },
>>>>>>       { 0 }
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-14 17:32         ` Mario Limonciello
  2026-04-14 17:52           ` Zhen, Max
  2026-04-15 16:49           ` Lizhi Hou
@ 2026-04-18  7:35           ` Greg KH
  2026-04-18 19:38             ` Mario Limonciello
  2 siblings, 1 reply; 10+ messages in thread
From: Greg KH @ 2026-04-18  7:35 UTC (permalink / raw)
  To: Mario Limonciello
  Cc: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski,
	Max Zhen, linux-kernel, sonal.santan

On Tue, Apr 14, 2026 at 12:32:20PM -0500, Mario Limonciello wrote:
> 
> 
> On 4/14/26 12:28, Lizhi Hou wrote:
> > 
> > On 4/14/26 10:17, Mario Limonciello wrote:
> > > 
> > > 
> > > On 4/14/26 12:16, Lizhi Hou wrote:
> > > > 
> > > > On 4/14/26 09:58, Mario Limonciello wrote:
> > > > > 
> > > > > 
> > > > > On 4/14/26 11:56, Lizhi Hou wrote:
> > > > > > From: Max Zhen <max.zhen@amd.com>
> > > > > > 
> > > > > > Add support for configuring the hardware scheduler time quantum to
> > > > > > improve fairness across concurrent contexts.
> > > > > > 
> > > > > > The scheduler enforces a fixed time slice per context, preventing
> > > > > > long-running workloads from monopolizing the device and allowing
> > > > > > other contexts to make forward progress.
> > > > > > 
> > > > > > The default time quantum is 30ms and can be configured via the
> > > > > > time_quantum_ms module parameter.
> > > > > 
> > > > > Can you talk more about how you want to use it?  Adding new
> > > > > module parameters is generally frowned upon in lieu of doing
> > > > > something with debugfs at runtime.
> > > > 
> > > > This is a static setting which is not supposed to change at
> > > > runtime. So module parameter is used.
> > > 
> > > But so what happens if user loads driver with default setting and
> > > then unloads driver and loads with a different setting as module
> > > option?
> > > 
> > > Does this flow fall apart because the driver initially programmed 30ms?
> > 
> > Reloading with new setting will overwrite the default setting. After the
> > module is loaded, it is not supposed to change before unloading the
> > module.
> > 
> 
> + Greg
> 
> Greg,
> 
> How do you feel about a module parameter for this purpose?  Any other
> suggestions if you don't like it?

module parameters should almost never never never be added to the
kernel, ESPECIALLY if it is for a device-specific thing (like in a
driver like this.)  Please don't do that.

> I was thinking a debugfs file still makes sense, but either the debugfs file
> can do unbind/rebind internally or user using debugfs file can do the
> unbind/bind sequence in sysfs after touching the debugfs file.

debugfs is for debugging, don't require it for functionality that a
user/admin actually wants to do for a device as many distros and systems
disable it entirely due to all of the security holes it exposes to
admins.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support
  2026-04-18  7:35           ` Greg KH
@ 2026-04-18 19:38             ` Mario Limonciello
  0 siblings, 0 replies; 10+ messages in thread
From: Mario Limonciello @ 2026-04-18 19:38 UTC (permalink / raw)
  To: Greg KH
  Cc: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski,
	Max Zhen, linux-kernel, sonal.santan



On 4/18/26 02:35, Greg KH wrote:
> On Tue, Apr 14, 2026 at 12:32:20PM -0500, Mario Limonciello wrote:
>>
>>
>> On 4/14/26 12:28, Lizhi Hou wrote:
>>>
>>> On 4/14/26 10:17, Mario Limonciello wrote:
>>>>
>>>>
>>>> On 4/14/26 12:16, Lizhi Hou wrote:
>>>>>
>>>>> On 4/14/26 09:58, Mario Limonciello wrote:
>>>>>>
>>>>>>
>>>>>> On 4/14/26 11:56, Lizhi Hou wrote:
>>>>>>> From: Max Zhen <max.zhen@amd.com>
>>>>>>>
>>>>>>> Add support for configuring the hardware scheduler time quantum to
>>>>>>> improve fairness across concurrent contexts.
>>>>>>>
>>>>>>> The scheduler enforces a fixed time slice per context, preventing
>>>>>>> long-running workloads from monopolizing the device and allowing
>>>>>>> other contexts to make forward progress.
>>>>>>>
>>>>>>> The default time quantum is 30ms and can be configured via the
>>>>>>> time_quantum_ms module parameter.
>>>>>>
>>>>>> Can you talk more about how you want to use it?  Adding new
>>>>>> module parameters is generally frowned upon in lieu of doing
>>>>>> something with debugfs at runtime.
>>>>>
>>>>> This is a static setting which is not supposed to change at
>>>>> runtime. So module parameter is used.
>>>>
>>>> But so what happens if user loads driver with default setting and
>>>> then unloads driver and loads with a different setting as module
>>>> option?
>>>>
>>>> Does this flow fall apart because the driver initially programmed 30ms?
>>>
>>> Reloading with new setting will overwrite the default setting. After the
>>> module is loaded, it is not supposed to change before unloading the
>>> module.
>>>
>>
>> + Greg
>>
>> Greg,
>>
>> How do you feel about a module parameter for this purpose?  Any other
>> suggestions if you don't like it?
> 
> module parameters should almost never never never be added to the
> kernel, ESPECIALLY if it is for a device-specific thing (like in a
> driver like this.)  Please don't do that.
> 
>> I was thinking a debugfs file still makes sense, but either the debugfs file
>> can do unbind/rebind internally or user using debugfs file can do the
>> unbind/bind sequence in sysfs after touching the debugfs file.
> 
> debugfs is for debugging, don't require it for functionality that a
> user/admin actually wants to do for a device as many distros and systems
> disable it entirely due to all of the security holes it exposes to
> admins.

It was for tuning purposes.  Max and team decided to make it a compile 
time macro.  Any tuning can be done by developers recompiling.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-04-18 19:38 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-14 16:56 [PATCH V1] accel/amdxdna: Add hardware scheduler time quantum support Lizhi Hou
2026-04-14 16:58 ` Mario Limonciello
2026-04-14 17:16   ` Lizhi Hou
2026-04-14 17:17     ` Mario Limonciello
2026-04-14 17:28       ` Lizhi Hou
2026-04-14 17:32         ` Mario Limonciello
2026-04-14 17:52           ` Zhen, Max
2026-04-15 16:49           ` Lizhi Hou
2026-04-18  7:35           ` Greg KH
2026-04-18 19:38             ` Mario Limonciello

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox