* [PATCH V2] accel/amdxdna: Fix incorrect command state for timed out job
@ 2025-10-29 19:34 Lizhi Hou
2025-10-29 19:38 ` Mario Limonciello
0 siblings, 1 reply; 3+ messages in thread
From: Lizhi Hou @ 2025-10-29 19:34 UTC (permalink / raw)
To: ogabbay, quic_jhugo, maciej.falkowski, dri-devel
Cc: Lizhi Hou, linux-kernel, max.zhen, sonal.santan,
mario.limonciello
When a command times out, mark it as ERT_CMD_STATE_TIMEOUT. Any other
commands that are canceled due to this timeout should be marked as
ERT_CMD_STATE_ABORT.
Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_ctx.c | 15 +++++++++++++--
drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index c6c473c78352..289a2aaf4cae 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -204,10 +204,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
cmd_abo = job->cmd_bo;
- if (unlikely(!data))
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
goto out;
+ }
- if (unlikely(size != sizeof(u32))) {
+ if (unlikely(!data) || unlikely(size != sizeof(u32))) {
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
ret = -EINVAL;
goto out;
@@ -258,6 +261,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
int ret = 0;
cmd_abo = job->cmd_bo;
+
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
+ goto out;
+ }
+
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
ret = -EINVAL;
@@ -370,6 +380,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
xdna = hwctx->client->xdna;
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ job->job_timeout = true;
mutex_lock(&xdna->dev_lock);
aie2_hwctx_stop(xdna, hwctx, sched_job);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index cbe60efbe60b..919c654dfea6 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -116,6 +116,7 @@ struct amdxdna_sched_job {
/* user can wait on this fence */
struct dma_fence *out_fence;
bool job_done;
+ bool job_timeout;
u64 seq;
struct amdxdna_drv_cmd *drv_cmd;
struct amdxdna_gem_obj *cmd_bo;
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH V2] accel/amdxdna: Fix incorrect command state for timed out job
2025-10-29 19:34 [PATCH V2] accel/amdxdna: Fix incorrect command state for timed out job Lizhi Hou
@ 2025-10-29 19:38 ` Mario Limonciello
2025-10-31 16:06 ` Lizhi Hou
0 siblings, 1 reply; 3+ messages in thread
From: Mario Limonciello @ 2025-10-29 19:38 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, maciej.falkowski, dri-devel
Cc: linux-kernel, max.zhen, sonal.santan
On 10/29/25 2:34 PM, Lizhi Hou wrote:
> When a command times out, mark it as ERT_CMD_STATE_TIMEOUT. Any other
> commands that are canceled due to this timeout should be marked as
> ERT_CMD_STATE_ABORT.
>
> Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>> ---
> drivers/accel/amdxdna/aie2_ctx.c | 15 +++++++++++++--
> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index c6c473c78352..289a2aaf4cae 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -204,10 +204,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
>
> cmd_abo = job->cmd_bo;
>
> - if (unlikely(!data))
> + if (unlikely(job->job_timeout)) {
> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
> + ret = -EINVAL;
> goto out;
> + }
>
> - if (unlikely(size != sizeof(u32))) {
> + if (unlikely(!data) || unlikely(size != sizeof(u32))) {
> amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
> ret = -EINVAL;
> goto out;
> @@ -258,6 +261,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
> int ret = 0;
>
> cmd_abo = job->cmd_bo;
> +
> + if (unlikely(job->job_timeout)) {
> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
> + ret = -EINVAL;
> + goto out;
> + }
> +
> if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
> amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
> ret = -EINVAL;
> @@ -370,6 +380,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
>
> xdna = hwctx->client->xdna;
> trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
> + job->job_timeout = true;
> mutex_lock(&xdna->dev_lock);
> aie2_hwctx_stop(xdna, hwctx, sched_job);
>
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index cbe60efbe60b..919c654dfea6 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -116,6 +116,7 @@ struct amdxdna_sched_job {
> /* user can wait on this fence */
> struct dma_fence *out_fence;
> bool job_done;
> + bool job_timeout;
> u64 seq;
> struct amdxdna_drv_cmd *drv_cmd;
> struct amdxdna_gem_obj *cmd_bo;
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH V2] accel/amdxdna: Fix incorrect command state for timed out job
2025-10-29 19:38 ` Mario Limonciello
@ 2025-10-31 16:06 ` Lizhi Hou
0 siblings, 0 replies; 3+ messages in thread
From: Lizhi Hou @ 2025-10-31 16:06 UTC (permalink / raw)
To: Mario Limonciello, ogabbay, quic_jhugo, maciej.falkowski,
dri-devel
Cc: linux-kernel, max.zhen, sonal.santan
Applied to drm-misc-next.
On 10/29/25 12:38, Mario Limonciello wrote:
> On 10/29/25 2:34 PM, Lizhi Hou wrote:
>> When a command times out, mark it as ERT_CMD_STATE_TIMEOUT. Any other
>> commands that are canceled due to this timeout should be marked as
>> ERT_CMD_STATE_ABORT.
>>
>> Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>> ---
>> drivers/accel/amdxdna/aie2_ctx.c | 15 +++++++++++++--
>> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
>> 2 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index c6c473c78352..289a2aaf4cae 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -204,10 +204,13 @@ aie2_sched_resp_handler(void *handle, void
>> __iomem *data, size_t size)
>> cmd_abo = job->cmd_bo;
>> - if (unlikely(!data))
>> + if (unlikely(job->job_timeout)) {
>> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
>> + ret = -EINVAL;
>> goto out;
>> + }
>> - if (unlikely(size != sizeof(u32))) {
>> + if (unlikely(!data) || unlikely(size != sizeof(u32))) {
>> amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>> ret = -EINVAL;
>> goto out;
>> @@ -258,6 +261,13 @@ aie2_sched_cmdlist_resp_handler(void *handle,
>> void __iomem *data, size_t size)
>> int ret = 0;
>> cmd_abo = job->cmd_bo;
>> +
>> + if (unlikely(job->job_timeout)) {
>> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
>> amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>> ret = -EINVAL;
>> @@ -370,6 +380,7 @@ aie2_sched_job_timedout(struct drm_sched_job
>> *sched_job)
>> xdna = hwctx->client->xdna;
>> trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>> + job->job_timeout = true;
>> mutex_lock(&xdna->dev_lock);
>> aie2_hwctx_stop(xdna, hwctx, sched_job);
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index cbe60efbe60b..919c654dfea6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -116,6 +116,7 @@ struct amdxdna_sched_job {
>> /* user can wait on this fence */
>> struct dma_fence *out_fence;
>> bool job_done;
>> + bool job_timeout;
>> u64 seq;
>> struct amdxdna_drv_cmd *drv_cmd;
>> struct amdxdna_gem_obj *cmd_bo;
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-10-31 16:07 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-29 19:34 [PATCH V2] accel/amdxdna: Fix incorrect command state for timed out job Lizhi Hou
2025-10-29 19:38 ` Mario Limonciello
2025-10-31 16:06 ` Lizhi Hou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).