* [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
@ 2026-04-21 18:15 Lizhi Hou
2026-04-21 19:18 ` Mario Limonciello
0 siblings, 1 reply; 5+ messages in thread
From: Lizhi Hou @ 2026-04-21 18:15 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
maciej.falkowski
Cc: Max Zhen, linux-kernel, sonal.santan, Lizhi Hou
From: Max Zhen <max.zhen@amd.com>
Add more trace coverage to amdxdna job handling and mailbox receive
processing to make driver execution easier to debug.
Extend the xdna_job trace event to record the command opcode in
addition to the job sequence number. Use the enhanced tracepoint in
the job run, sent-to-device, signaled-fence, and job-free paths so
that trace output can be correlated with the command being executed.
Also add debug-point tracing when a command is received through the
submit ioctl path, and add a trace event when the mailbox RX worker
runs.
These changes improve visibility into job lifetime transitions and
mailbox activity, which helps debug command flow and scheduler issues.
Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_ctx.c | 14 ++++++---
drivers/accel/amdxdna/amdxdna_ctx.c | 3 +-
drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
include/trace/events/amdxdna.h | 42 ++++++++++++++++---------
5 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index d37123d925b6..3b0feba448c4 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
struct amdxdna_sched_job *job;
job = container_of(ref, struct amdxdna_sched_job, refcnt);
+
amdxdna_sched_job_cleanup(job);
atomic64_inc(&job->hwctx->job_free_cnt);
wake_up(&job->hwctx->priv->job_free_wq);
@@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
{
struct dma_fence *fence = job->fence;
- trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+ trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
aie2_tdr_signal(job->hwctx->client->xdna);
job->hwctx->priv->completed++;
@@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence;
int ret;
+ trace_xdna_job(sched_job, hwctx->name, "job run",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
+
if (!hwctx->priv->mbox_chann)
return NULL;
@@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
} else {
aie2_tdr_signal(hwctx->client->xdna);
}
- trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+ trace_xdna_job(sched_job, hwctx->name, "sent to device",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
return fence;
}
@@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job)
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
struct amdxdna_hwctx *hwctx = job->hwctx;
- trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+ trace_xdna_job(sched_job, hwctx->name, "job free",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
if (!job->job_done)
up(&hwctx->priv->job_sem);
@@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
int ret;
xdna = hwctx->client->xdna;
- trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
guard(mutex)(&xdna->dev_lock);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index ff6c3e8e5a15..2c2c21992c87 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
goto unlock_srcu;
}
-
job->hwctx = hwctx;
job->mm = current->mm;
@@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
if (args->ext || args->ext_flags)
return -EINVAL;
+ trace_amdxdna_debug_point(current->comm, args->type, "job received");
+
switch (args->type) {
case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
return amdxdna_drm_submit_execbuf(client, args);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index a8557d7e8923..355798687376 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -119,6 +119,7 @@ struct amdxdna_hwctx {
container_of(j, struct amdxdna_sched_job, base)
enum amdxdna_job_opcode {
+ DEFAULT_IO,
SYNC_DEBUG_BO,
ATTACH_DEBUG_BO,
DETACH_DEBUG_BO,
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 37771bdb24a1..cc8865f4e79c 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
int ret;
mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+ trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
if (READ_ONCE(mb_chann->bad_state)) {
MB_ERR(mb_chann, "Channel in bad state, work aborted");
diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
index c6cb2da7b706..71da24267e52 100644
--- a/include/trace/events/amdxdna.h
+++ b/include/trace/events/amdxdna.h
@@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
);
TRACE_EVENT(xdna_job,
- TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
+ TP_PROTO(struct drm_sched_job *sched_job, const char *name,
+ const char *str, u64 seq, u32 op),
- TP_ARGS(sched_job, name, str, seq),
+ TP_ARGS(sched_job, name, str, seq, op),
TP_STRUCT__entry(__string(name, name)
__string(str, str)
__field(u64, fence_context)
__field(u64, fence_seqno)
- __field(u64, seq)),
+ __field(u64, seq)
+ __field(u32, op)),
TP_fast_assign(__assign_str(name);
__assign_str(str);
__entry->fence_context = sched_job->s_fence->finished.context;
__entry->fence_seqno = sched_job->s_fence->finished.seqno;
- __entry->seq = seq;),
+ __entry->seq = seq;
+ __entry->op = op;),
- TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
+ TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u",
__entry->fence_context, __entry->fence_seqno,
__get_str(name), __entry->seq,
- __get_str(str))
+ __get_str(str),
+ __entry->op)
);
DECLARE_EVENT_CLASS(xdna_mbox_msg,
@@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
TP_ARGS(name, chann_id, opcode, id)
);
-TRACE_EVENT(mbox_irq_handle,
- TP_PROTO(char *name, int irq),
+DECLARE_EVENT_CLASS(xdna_mbox_name_id,
+ TP_PROTO(char *name, int irq),
- TP_ARGS(name, irq),
+ TP_ARGS(name, irq),
- TP_STRUCT__entry(__string(name, name)
- __field(int, irq)),
+ TP_STRUCT__entry(__string(name, name)
+ __field(int, irq)),
- TP_fast_assign(__assign_str(name);
- __entry->irq = irq;),
+ TP_fast_assign(__assign_str(name);
+ __entry->irq = irq;),
+
+ TP_printk("%s.%d", __get_str(name), __entry->irq)
+);
+
+DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
+ TP_PROTO(char *name, int irq),
+ TP_ARGS(name, irq)
+);
- TP_printk("%s.%d", __get_str(name), __entry->irq)
+DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
+ TP_PROTO(char *name, int irq),
+ TP_ARGS(name, irq)
);
#endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
2026-04-21 18:15 [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Lizhi Hou
@ 2026-04-21 19:18 ` Mario Limonciello
2026-04-21 19:39 ` Lizhi Hou
0 siblings, 1 reply; 5+ messages in thread
From: Mario Limonciello @ 2026-04-21 19:18 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
Cc: Max Zhen, linux-kernel, sonal.santan
On 4/21/26 13:15, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
>
> Add more trace coverage to amdxdna job handling and mailbox receive
> processing to make driver execution easier to debug.
>
> Extend the xdna_job trace event to record the command opcode in
> addition to the job sequence number. Use the enhanced tracepoint in
> the job run, sent-to-device, signaled-fence, and job-free paths so
> that trace output can be correlated with the command being executed.
>
> Also add debug-point tracing when a command is received through the
> submit ioctl path, and add a trace event when the mailbox RX worker
> runs.
>
> These changes improve visibility into job lifetime transitions and
> mailbox activity, which helps debug command flow and scheduler issues.
>
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
> drivers/accel/amdxdna/aie2_ctx.c | 14 ++++++---
> drivers/accel/amdxdna/amdxdna_ctx.c | 3 +-
> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
> drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
> include/trace/events/amdxdna.h | 42 ++++++++++++++++---------
> 5 files changed, 42 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index d37123d925b6..3b0feba448c4 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
> struct amdxdna_sched_job *job;
>
> job = container_of(ref, struct amdxdna_sched_job, refcnt);
> +
> amdxdna_sched_job_cleanup(job);
> atomic64_inc(&job->hwctx->job_free_cnt);
> wake_up(&job->hwctx->priv->job_free_wq);
> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
> {
> struct dma_fence *fence = job->fence;
>
> - trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
> + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
> + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
>
> aie2_tdr_signal(job->hwctx->client->xdna);
> job->hwctx->priv->completed++;
> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
> struct dma_fence *fence;
> int ret;
>
> + trace_xdna_job(sched_job, hwctx->name, "job run",
> + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
> +
> if (!hwctx->priv->mbox_chann)
> return NULL;
>
> @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
> } else {
> aie2_tdr_signal(hwctx->client->xdna);
> }
> - trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
> + trace_xdna_job(sched_job, hwctx->name, "sent to device",
> + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
>
> return fence;
> }
> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job)
> struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
> struct amdxdna_hwctx *hwctx = job->hwctx;
>
> - trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
> + trace_xdna_job(sched_job, hwctx->name, "job free",
> + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
> if (!job->job_done)
> up(&hwctx->priv->job_sem);
>
> @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
> int ret;
>
> xdna = hwctx->client->xdna;
> - trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>
> guard(mutex)(&xdna->dev_lock);
>
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index ff6c3e8e5a15..2c2c21992c87 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
> goto unlock_srcu;
> }
>
> -
> job->hwctx = hwctx;
> job->mm = current->mm;
>
> @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
> if (args->ext || args->ext_flags)
> return -EINVAL;
>
> + trace_amdxdna_debug_point(current->comm, args->type, "job received");
> +
> switch (args->type) {
> case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
> return amdxdna_drm_submit_execbuf(client, args);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index a8557d7e8923..355798687376 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
> container_of(j, struct amdxdna_sched_job, base)
>
> enum amdxdna_job_opcode {
> + DEFAULT_IO,
Do you really want this at the beginning of the list? Doesn't that
break uses of amdxdna_drv_cmd that has the previous indexing?
> SYNC_DEBUG_BO,
> ATTACH_DEBUG_BO,
> DETACH_DEBUG_BO,
> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
> index 37771bdb24a1..cc8865f4e79c 100644
> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
> int ret;
>
> mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
> + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>
> if (READ_ONCE(mb_chann->bad_state)) {
> MB_ERR(mb_chann, "Channel in bad state, work aborted");
> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
> index c6cb2da7b706..71da24267e52 100644
> --- a/include/trace/events/amdxdna.h
> +++ b/include/trace/events/amdxdna.h
> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
> );
>
> TRACE_EVENT(xdna_job,
> - TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
> + TP_PROTO(struct drm_sched_job *sched_job, const char *name,
> + const char *str, u64 seq, u32 op),
>
> - TP_ARGS(sched_job, name, str, seq),
> + TP_ARGS(sched_job, name, str, seq, op),
>
> TP_STRUCT__entry(__string(name, name)
> __string(str, str)
> __field(u64, fence_context)
> __field(u64, fence_seqno)
> - __field(u64, seq)),
> + __field(u64, seq)
> + __field(u32, op)),
>
> TP_fast_assign(__assign_str(name);
> __assign_str(str);
> __entry->fence_context = sched_job->s_fence->finished.context;
> __entry->fence_seqno = sched_job->s_fence->finished.seqno;
> - __entry->seq = seq;),
> + __entry->seq = seq;
> + __entry->op = op;),
>
> - TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
> + TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u",
> __entry->fence_context, __entry->fence_seqno,
> __get_str(name), __entry->seq,
> - __get_str(str))
> + __get_str(str),
> + __entry->op)
> );
>
> DECLARE_EVENT_CLASS(xdna_mbox_msg,
> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
> TP_ARGS(name, chann_id, opcode, id)
> );
>
> -TRACE_EVENT(mbox_irq_handle,
> - TP_PROTO(char *name, int irq),
> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
> + TP_PROTO(char *name, int irq),
>
> - TP_ARGS(name, irq),
> + TP_ARGS(name, irq),
>
> - TP_STRUCT__entry(__string(name, name)
> - __field(int, irq)),
> + TP_STRUCT__entry(__string(name, name)
> + __field(int, irq)),
>
> - TP_fast_assign(__assign_str(name);
> - __entry->irq = irq;),
> + TP_fast_assign(__assign_str(name);
> + __entry->irq = irq;),
> +
> + TP_printk("%s.%d", __get_str(name), __entry->irq)
> +);
> +
> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
> + TP_PROTO(char *name, int irq),
> + TP_ARGS(name, irq)
> +);
>
> - TP_printk("%s.%d", __get_str(name), __entry->irq)
> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
> + TP_PROTO(char *name, int irq),
> + TP_ARGS(name, irq)
> );
>
> #endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
2026-04-21 19:18 ` Mario Limonciello
@ 2026-04-21 19:39 ` Lizhi Hou
2026-04-21 19:45 ` Mario Limonciello
0 siblings, 1 reply; 5+ messages in thread
From: Lizhi Hou @ 2026-04-21 19:39 UTC (permalink / raw)
To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
maciej.falkowski
Cc: Max Zhen, linux-kernel, sonal.santan
On 4/21/26 12:18, Mario Limonciello wrote:
>
>
> On 4/21/26 13:15, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add more trace coverage to amdxdna job handling and mailbox receive
>> processing to make driver execution easier to debug.
>>
>> Extend the xdna_job trace event to record the command opcode in
>> addition to the job sequence number. Use the enhanced tracepoint in
>> the job run, sent-to-device, signaled-fence, and job-free paths so
>> that trace output can be correlated with the command being executed.
>>
>> Also add debug-point tracing when a command is received through the
>> submit ioctl path, and add a trace event when the mailbox RX worker
>> runs.
>>
>> These changes improve visibility into job lifetime transitions and
>> mailbox activity, which helps debug command flow and scheduler issues.
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>> drivers/accel/amdxdna/aie2_ctx.c | 14 ++++++---
>> drivers/accel/amdxdna/amdxdna_ctx.c | 3 +-
>> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
>> drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
>> include/trace/events/amdxdna.h | 42 ++++++++++++++++---------
>> 5 files changed, 42 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index d37123d925b6..3b0feba448c4 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>> struct amdxdna_sched_job *job;
>> job = container_of(ref, struct amdxdna_sched_job, refcnt);
>> +
>> amdxdna_sched_job_cleanup(job);
>> atomic64_inc(&job->hwctx->job_free_cnt);
>> wake_up(&job->hwctx->priv->job_free_wq);
>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>> {
>> struct dma_fence *fence = job->fence;
>> - trace_xdna_job(&job->base, job->hwctx->name, "signaled fence",
>> job->seq);
>> + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>> DEFAULT_IO);
>> aie2_tdr_signal(job->hwctx->client->xdna);
>> job->hwctx->priv->completed++;
>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>> struct dma_fence *fence;
>> int ret;
>> + trace_xdna_job(sched_job, hwctx->name, "job run",
>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>> DEFAULT_IO);
>> +
>> if (!hwctx->priv->mbox_chann)
>> return NULL;
>> @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job
>> *sched_job)
>> } else {
>> aie2_tdr_signal(hwctx->client->xdna);
>> }
>> - trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
>> + trace_xdna_job(sched_job, hwctx->name, "sent to device",
>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>> DEFAULT_IO);
>> return fence;
>> }
>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct
>> drm_sched_job *sched_job)
>> struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>> struct amdxdna_hwctx *hwctx = job->hwctx;
>> - trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>> + trace_xdna_job(sched_job, hwctx->name, "job free",
>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>> DEFAULT_IO);
>> if (!job->job_done)
>> up(&hwctx->priv->job_sem);
>> @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job
>> *sched_job)
>> int ret;
>> xdna = hwctx->client->xdna;
>> - trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>> guard(mutex)(&xdna->dev_lock);
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index ff6c3e8e5a15..2c2c21992c87 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>> *client,
>> goto unlock_srcu;
>> }
>> -
>> job->hwctx = hwctx;
>> job->mm = current->mm;
>> @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct
>> drm_device *dev, void *data, struct drm_
>> if (args->ext || args->ext_flags)
>> return -EINVAL;
>> + trace_amdxdna_debug_point(current->comm, args->type, "job
>> received");
>> +
>> switch (args->type) {
>> case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>> return amdxdna_drm_submit_execbuf(client, args);
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index a8557d7e8923..355798687376 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>> container_of(j, struct amdxdna_sched_job, base)
>> enum amdxdna_job_opcode {
>> + DEFAULT_IO,
>
> Do you really want this at the beginning of the list? Doesn't that
> break uses of amdxdna_drv_cmd that has the previous indexing?
*_DEBUG_BO is driver internal use only. Using 0 here to align with our
current trace scripts.
Lizhi
>
>> SYNC_DEBUG_BO,
>> ATTACH_DEBUG_BO,
>> DETACH_DEBUG_BO,
>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c
>> b/drivers/accel/amdxdna/amdxdna_mailbox.c
>> index 37771bdb24a1..cc8865f4e79c 100644
>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct
>> *rx_work)
>> int ret;
>> mb_chann = container_of(rx_work, struct mailbox_channel,
>> rx_work);
>> + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>> if (READ_ONCE(mb_chann->bad_state)) {
>> MB_ERR(mb_chann, "Channel in bad state, work aborted");
>> diff --git a/include/trace/events/amdxdna.h
>> b/include/trace/events/amdxdna.h
>> index c6cb2da7b706..71da24267e52 100644
>> --- a/include/trace/events/amdxdna.h
>> +++ b/include/trace/events/amdxdna.h
>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>> );
>> TRACE_EVENT(xdna_job,
>> - TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>> const char *str, u64 seq),
>> + TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>> + const char *str, u64 seq, u32 op),
>> - TP_ARGS(sched_job, name, str, seq),
>> + TP_ARGS(sched_job, name, str, seq, op),
>> TP_STRUCT__entry(__string(name, name)
>> __string(str, str)
>> __field(u64, fence_context)
>> __field(u64, fence_seqno)
>> - __field(u64, seq)),
>> + __field(u64, seq)
>> + __field(u32, op)),
>> TP_fast_assign(__assign_str(name);
>> __assign_str(str);
>> __entry->fence_context =
>> sched_job->s_fence->finished.context;
>> __entry->fence_seqno =
>> sched_job->s_fence->finished.seqno;
>> - __entry->seq = seq;),
>> + __entry->seq = seq;
>> + __entry->op = op;),
>> - TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld
>> %s",
>> + TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu
>> %s, op=%u",
>> __entry->fence_context, __entry->fence_seqno,
>> __get_str(name), __entry->seq,
>> - __get_str(str))
>> + __get_str(str),
>> + __entry->op)
>> );
>> DECLARE_EVENT_CLASS(xdna_mbox_msg,
>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>> TP_ARGS(name, chann_id, opcode, id)
>> );
>> -TRACE_EVENT(mbox_irq_handle,
>> - TP_PROTO(char *name, int irq),
>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>> + TP_PROTO(char *name, int irq),
>> - TP_ARGS(name, irq),
>> + TP_ARGS(name, irq),
>> - TP_STRUCT__entry(__string(name, name)
>> - __field(int, irq)),
>> + TP_STRUCT__entry(__string(name, name)
>> + __field(int, irq)),
>> - TP_fast_assign(__assign_str(name);
>> - __entry->irq = irq;),
>> + TP_fast_assign(__assign_str(name);
>> + __entry->irq = irq;),
>> +
>> + TP_printk("%s.%d", __get_str(name), __entry->irq)
>> +);
>> +
>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>> + TP_PROTO(char *name, int irq),
>> + TP_ARGS(name, irq)
>> +);
>> - TP_printk("%s.%d", __get_str(name), __entry->irq)
>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>> + TP_PROTO(char *name, int irq),
>> + TP_ARGS(name, irq)
>> );
>> #endif /* !defined(_TRACE_AMDXDNA_H) ||
>> defined(TRACE_HEADER_MULTI_READ) */
>
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
2026-04-21 19:39 ` Lizhi Hou
@ 2026-04-21 19:45 ` Mario Limonciello
2026-04-22 15:38 ` Lizhi Hou
0 siblings, 1 reply; 5+ messages in thread
From: Mario Limonciello @ 2026-04-21 19:45 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
Cc: Max Zhen, linux-kernel, sonal.santan
On 4/21/26 14:39, Lizhi Hou wrote:
>
> On 4/21/26 12:18, Mario Limonciello wrote:
>>
>>
>> On 4/21/26 13:15, Lizhi Hou wrote:
>>> From: Max Zhen <max.zhen@amd.com>
>>>
>>> Add more trace coverage to amdxdna job handling and mailbox receive
>>> processing to make driver execution easier to debug.
>>>
>>> Extend the xdna_job trace event to record the command opcode in
>>> addition to the job sequence number. Use the enhanced tracepoint in
>>> the job run, sent-to-device, signaled-fence, and job-free paths so
>>> that trace output can be correlated with the command being executed.
>>>
>>> Also add debug-point tracing when a command is received through the
>>> submit ioctl path, and add a trace event when the mailbox RX worker
>>> runs.
>>>
>>> These changes improve visibility into job lifetime transitions and
>>> mailbox activity, which helps debug command flow and scheduler issues.
>>>
>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>>> ---
>>> drivers/accel/amdxdna/aie2_ctx.c | 14 ++++++---
>>> drivers/accel/amdxdna/amdxdna_ctx.c | 3 +-
>>> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
>>> include/trace/events/amdxdna.h | 42 ++++++++++++++++---------
>>> 5 files changed, 42 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>> amdxdna/aie2_ctx.c
>>> index d37123d925b6..3b0feba448c4 100644
>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>>> struct amdxdna_sched_job *job;
>>> job = container_of(ref, struct amdxdna_sched_job, refcnt);
>>> +
>>> amdxdna_sched_job_cleanup(job);
>>> atomic64_inc(&job->hwctx->job_free_cnt);
>>> wake_up(&job->hwctx->priv->job_free_wq);
>>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>> {
>>> struct dma_fence *fence = job->fence;
>>> - trace_xdna_job(&job->base, job->hwctx->name, "signaled fence",
>>> job->seq);
>>> + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>> DEFAULT_IO);
>>> aie2_tdr_signal(job->hwctx->client->xdna);
>>> job->hwctx->priv->completed++;
>>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>>> struct dma_fence *fence;
>>> int ret;
>>> + trace_xdna_job(sched_job, hwctx->name, "job run",
>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>> DEFAULT_IO);
>>> +
>>> if (!hwctx->priv->mbox_chann)
>>> return NULL;
>>> @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job
>>> *sched_job)
>>> } else {
>>> aie2_tdr_signal(hwctx->client->xdna);
>>> }
>>> - trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
>>> + trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>> DEFAULT_IO);
>>> return fence;
>>> }
>>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct
>>> drm_sched_job *sched_job)
>>> struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>>> struct amdxdna_hwctx *hwctx = job->hwctx;
>>> - trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>>> + trace_xdna_job(sched_job, hwctx->name, "job free",
>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>> DEFAULT_IO);
>>> if (!job->job_done)
>>> up(&hwctx->priv->job_sem);
>>> @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job
>>> *sched_job)
>>> int ret;
>>> xdna = hwctx->client->xdna;
>>> - trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>>> guard(mutex)(&xdna->dev_lock);
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>> amdxdna/amdxdna_ctx.c
>>> index ff6c3e8e5a15..2c2c21992c87 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>> *client,
>>> goto unlock_srcu;
>>> }
>>> -
>>> job->hwctx = hwctx;
>>> job->mm = current->mm;
>>> @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct
>>> drm_device *dev, void *data, struct drm_
>>> if (args->ext || args->ext_flags)
>>> return -EINVAL;
>>> + trace_amdxdna_debug_point(current->comm, args->type, "job
>>> received");
>>> +
>>> switch (args->type) {
>>> case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>>> return amdxdna_drm_submit_execbuf(client, args);
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/
>>> amdxdna/amdxdna_ctx.h
>>> index a8557d7e8923..355798687376 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>>> container_of(j, struct amdxdna_sched_job, base)
>>> enum amdxdna_job_opcode {
>>> + DEFAULT_IO,
>>
>> Do you really want this at the beginning of the list? Doesn't that
>> break uses of amdxdna_drv_cmd that has the previous indexing?
>
> *_DEBUG_BO is driver internal use only. Using 0 here to align with our
> current trace scripts.
>
> Lizhi
>
>>
>>> SYNC_DEBUG_BO,
>>> ATTACH_DEBUG_BO,
>>> DETACH_DEBUG_BO,
>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/
>>> amdxdna/amdxdna_mailbox.c
>>> index 37771bdb24a1..cc8865f4e79c 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct
>>> *rx_work)
>>> int ret;
>>> mb_chann = container_of(rx_work, struct mailbox_channel,
>>> rx_work);
>>> + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>>> if (READ_ONCE(mb_chann->bad_state)) {
>>> MB_ERR(mb_chann, "Channel in bad state, work aborted");
>>> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/
>>> amdxdna.h
>>> index c6cb2da7b706..71da24267e52 100644
>>> --- a/include/trace/events/amdxdna.h
>>> +++ b/include/trace/events/amdxdna.h
>>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>>> );
>>> TRACE_EVENT(xdna_job,
>>> - TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>>> const char *str, u64 seq),
>>> + TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>>> + const char *str, u64 seq, u32 op),
>>> - TP_ARGS(sched_job, name, str, seq),
>>> + TP_ARGS(sched_job, name, str, seq, op),
>>> TP_STRUCT__entry(__string(name, name)
>>> __string(str, str)
>>> __field(u64, fence_context)
>>> __field(u64, fence_seqno)
>>> - __field(u64, seq)),
>>> + __field(u64, seq)
>>> + __field(u32, op)),
>>> TP_fast_assign(__assign_str(name);
>>> __assign_str(str);
>>> __entry->fence_context = sched_job->s_fence-
>>> >finished.context;
>>> __entry->fence_seqno = sched_job->s_fence-
>>> >finished.seqno;
>>> - __entry->seq = seq;),
>>> + __entry->seq = seq;
>>> + __entry->op = op;),
>>> - TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld
>>> %s",
>>> + TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu
>>> %s, op=%u",
>>> __entry->fence_context, __entry->fence_seqno,
>>> __get_str(name), __entry->seq,
>>> - __get_str(str))
>>> + __get_str(str),
>>> + __entry->op)
>>> );
>>> DECLARE_EVENT_CLASS(xdna_mbox_msg,
>>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>>> TP_ARGS(name, chann_id, opcode, id)
>>> );
>>> -TRACE_EVENT(mbox_irq_handle,
>>> - TP_PROTO(char *name, int irq),
>>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>>> + TP_PROTO(char *name, int irq),
>>> - TP_ARGS(name, irq),
>>> + TP_ARGS(name, irq),
>>> - TP_STRUCT__entry(__string(name, name)
>>> - __field(int, irq)),
>>> + TP_STRUCT__entry(__string(name, name)
>>> + __field(int, irq)),
>>> - TP_fast_assign(__assign_str(name);
>>> - __entry->irq = irq;),
>>> + TP_fast_assign(__assign_str(name);
>>> + __entry->irq = irq;),
>>> +
>>> + TP_printk("%s.%d", __get_str(name), __entry->irq)
>>> +);
>>> +
>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>>> + TP_PROTO(char *name, int irq),
>>> + TP_ARGS(name, irq)
>>> +);
>>> - TP_printk("%s.%d", __get_str(name), __entry->irq)
>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>>> + TP_PROTO(char *name, int irq),
>>> + TP_ARGS(name, irq)
>>> );
>>> #endif /* !defined(_TRACE_AMDXDNA_H) ||
>>> defined(TRACE_HEADER_MULTI_READ) */
>>
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
2026-04-21 19:45 ` Mario Limonciello
@ 2026-04-22 15:38 ` Lizhi Hou
0 siblings, 0 replies; 5+ messages in thread
From: Lizhi Hou @ 2026-04-22 15:38 UTC (permalink / raw)
To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
maciej.falkowski
Cc: Max Zhen, linux-kernel, sonal.santan
Applied to drm-misc-next
On 4/21/26 12:45, Mario Limonciello wrote:
>
>
> On 4/21/26 14:39, Lizhi Hou wrote:
>>
>> On 4/21/26 12:18, Mario Limonciello wrote:
>>>
>>>
>>> On 4/21/26 13:15, Lizhi Hou wrote:
>>>> From: Max Zhen <max.zhen@amd.com>
>>>>
>>>> Add more trace coverage to amdxdna job handling and mailbox receive
>>>> processing to make driver execution easier to debug.
>>>>
>>>> Extend the xdna_job trace event to record the command opcode in
>>>> addition to the job sequence number. Use the enhanced tracepoint in
>>>> the job run, sent-to-device, signaled-fence, and job-free paths so
>>>> that trace output can be correlated with the command being executed.
>>>>
>>>> Also add debug-point tracing when a command is received through the
>>>> submit ioctl path, and add a trace event when the mailbox RX worker
>>>> runs.
>>>>
>>>> These changes improve visibility into job lifetime transitions and
>>>> mailbox activity, which helps debug command flow and scheduler issues.
>>>>
>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>>>> ---
>>>> drivers/accel/amdxdna/aie2_ctx.c | 14 ++++++---
>>>> drivers/accel/amdxdna/amdxdna_ctx.c | 3 +-
>>>> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
>>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
>>>> include/trace/events/amdxdna.h | 42
>>>> ++++++++++++++++---------
>>>> 5 files changed, 42 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>>> amdxdna/aie2_ctx.c
>>>> index d37123d925b6..3b0feba448c4 100644
>>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>>>> struct amdxdna_sched_job *job;
>>>> job = container_of(ref, struct amdxdna_sched_job, refcnt);
>>>> +
>>>> amdxdna_sched_job_cleanup(job);
>>>> atomic64_inc(&job->hwctx->job_free_cnt);
>>>> wake_up(&job->hwctx->priv->job_free_wq);
>>>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>>> {
>>>> struct dma_fence *fence = job->fence;
>>>> - trace_xdna_job(&job->base, job->hwctx->name, "signaled
>>>> fence", job->seq);
>>>> + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> aie2_tdr_signal(job->hwctx->client->xdna);
>>>> job->hwctx->priv->completed++;
>>>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job
>>>> *sched_job)
>>>> struct dma_fence *fence;
>>>> int ret;
>>>> + trace_xdna_job(sched_job, hwctx->name, "job run",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> +
>>>> if (!hwctx->priv->mbox_chann)
>>>> return NULL;
>>>> @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job
>>>> *sched_job)
>>>> } else {
>>>> aie2_tdr_signal(hwctx->client->xdna);
>>>> }
>>>> - trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>>> job->seq);
>>>> + trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> return fence;
>>>> }
>>>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct
>>>> drm_sched_job *sched_job)
>>>> struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>>>> struct amdxdna_hwctx *hwctx = job->hwctx;
>>>> - trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>>>> + trace_xdna_job(sched_job, hwctx->name, "job free",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> if (!job->job_done)
>>>> up(&hwctx->priv->job_sem);
>>>> @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job
>>>> *sched_job)
>>>> int ret;
>>>> xdna = hwctx->client->xdna;
>>>> - trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>>>> guard(mutex)(&xdna->dev_lock);
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>>> amdxdna/amdxdna_ctx.c
>>>> index ff6c3e8e5a15..2c2c21992c87 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>>> *client,
>>>> goto unlock_srcu;
>>>> }
>>>> -
>>>> job->hwctx = hwctx;
>>>> job->mm = current->mm;
>>>> @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct
>>>> drm_device *dev, void *data, struct drm_
>>>> if (args->ext || args->ext_flags)
>>>> return -EINVAL;
>>>> + trace_amdxdna_debug_point(current->comm, args->type, "job
>>>> received");
>>>> +
>>>> switch (args->type) {
>>>> case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>>>> return amdxdna_drm_submit_execbuf(client, args);
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/
>>>> amdxdna/amdxdna_ctx.h
>>>> index a8557d7e8923..355798687376 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>>>> container_of(j, struct amdxdna_sched_job, base)
>>>> enum amdxdna_job_opcode {
>>>> + DEFAULT_IO,
>>>
>>> Do you really want this at the beginning of the list? Doesn't that
>>> break uses of amdxdna_drv_cmd that has the previous indexing?
>>
>> *_DEBUG_BO is driver internal use only. Using 0 here to align with
>> our current trace scripts.
>>
>> Lizhi
>>
>>>
>>>> SYNC_DEBUG_BO,
>>>> ATTACH_DEBUG_BO,
>>>> DETACH_DEBUG_BO,
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> b/drivers/accel/ amdxdna/amdxdna_mailbox.c
>>>> index 37771bdb24a1..cc8865f4e79c 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct
>>>> work_struct *rx_work)
>>>> int ret;
>>>> mb_chann = container_of(rx_work, struct mailbox_channel,
>>>> rx_work);
>>>> + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>>>> if (READ_ONCE(mb_chann->bad_state)) {
>>>> MB_ERR(mb_chann, "Channel in bad state, work aborted");
>>>> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/
>>>> amdxdna.h
>>>> index c6cb2da7b706..71da24267e52 100644
>>>> --- a/include/trace/events/amdxdna.h
>>>> +++ b/include/trace/events/amdxdna.h
>>>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>>>> );
>>>> TRACE_EVENT(xdna_job,
>>>> - TP_PROTO(struct drm_sched_job *sched_job, const char
>>>> *name, const char *str, u64 seq),
>>>> + TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>>>> + const char *str, u64 seq, u32 op),
>>>> - TP_ARGS(sched_job, name, str, seq),
>>>> + TP_ARGS(sched_job, name, str, seq, op),
>>>> TP_STRUCT__entry(__string(name, name)
>>>> __string(str, str)
>>>> __field(u64, fence_context)
>>>> __field(u64, fence_seqno)
>>>> - __field(u64, seq)),
>>>> + __field(u64, seq)
>>>> + __field(u32, op)),
>>>> TP_fast_assign(__assign_str(name);
>>>> __assign_str(str);
>>>> __entry->fence_context = sched_job->s_fence-
>>>> >finished.context;
>>>> __entry->fence_seqno = sched_job->s_fence-
>>>> >finished.seqno;
>>>> - __entry->seq = seq;),
>>>> + __entry->seq = seq;
>>>> + __entry->op = op;),
>>>> - TP_printk("fence=(context:%llu, seqno:%lld), %s
>>>> seq#:%lld %s",
>>>> + TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu
>>>> %s, op=%u",
>>>> __entry->fence_context, __entry->fence_seqno,
>>>> __get_str(name), __entry->seq,
>>>> - __get_str(str))
>>>> + __get_str(str),
>>>> + __entry->op)
>>>> );
>>>> DECLARE_EVENT_CLASS(xdna_mbox_msg,
>>>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>>>> TP_ARGS(name, chann_id, opcode, id)
>>>> );
>>>> -TRACE_EVENT(mbox_irq_handle,
>>>> - TP_PROTO(char *name, int irq),
>>>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>>>> + TP_PROTO(char *name, int irq),
>>>> - TP_ARGS(name, irq),
>>>> + TP_ARGS(name, irq),
>>>> - TP_STRUCT__entry(__string(name, name)
>>>> - __field(int, irq)),
>>>> + TP_STRUCT__entry(__string(name, name)
>>>> + __field(int, irq)),
>>>> - TP_fast_assign(__assign_str(name);
>>>> - __entry->irq = irq;),
>>>> + TP_fast_assign(__assign_str(name);
>>>> + __entry->irq = irq;),
>>>> +
>>>> + TP_printk("%s.%d", __get_str(name), __entry->irq)
>>>> +);
>>>> +
>>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>>>> + TP_PROTO(char *name, int irq),
>>>> + TP_ARGS(name, irq)
>>>> +);
>>>> - TP_printk("%s.%d", __get_str(name), __entry->irq)
>>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>>>> + TP_PROTO(char *name, int irq),
>>>> + TP_ARGS(name, irq)
>>>> );
>>>> #endif /* !defined(_TRACE_AMDXDNA_H) ||
>>>> defined(TRACE_HEADER_MULTI_READ) */
>>>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-04-22 15:38 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-21 18:15 [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Lizhi Hou
2026-04-21 19:18 ` Mario Limonciello
2026-04-21 19:39 ` Lizhi Hou
2026-04-21 19:45 ` Mario Limonciello
2026-04-22 15:38 ` Lizhi Hou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox