public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
@ 2026-04-21 18:15 Lizhi Hou
  2026-04-21 19:18 ` Mario Limonciello
  0 siblings, 1 reply; 5+ messages in thread
From: Lizhi Hou @ 2026-04-21 18:15 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan, Lizhi Hou

From: Max Zhen <max.zhen@amd.com>

Add more trace coverage to amdxdna job handling and mailbox receive
processing to make driver execution easier to debug.

Extend the xdna_job trace event to record the command opcode in
addition to the job sequence number. Use the enhanced tracepoint in
the job run, sent-to-device, signaled-fence, and job-free paths so
that trace output can be correlated with the command being executed.

Also add debug-point tracing when a command is received through the
submit ioctl path, and add a trace event when the mailbox RX worker
runs.

These changes improve visibility into job lifetime transitions and
mailbox activity, which helps debug command flow and scheduler issues.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_ctx.c        | 14 ++++++---
 drivers/accel/amdxdna/amdxdna_ctx.c     |  3 +-
 drivers/accel/amdxdna/amdxdna_ctx.h     |  1 +
 drivers/accel/amdxdna/amdxdna_mailbox.c |  1 +
 include/trace/events/amdxdna.h          | 42 ++++++++++++++++---------
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index d37123d925b6..3b0feba448c4 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
 	struct amdxdna_sched_job *job;
 
 	job = container_of(ref, struct amdxdna_sched_job, refcnt);
+
 	amdxdna_sched_job_cleanup(job);
 	atomic64_inc(&job->hwctx->job_free_cnt);
 	wake_up(&job->hwctx->priv->job_free_wq);
@@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
 {
 	struct dma_fence *fence = job->fence;
 
-	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+	trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
 
 	aie2_tdr_signal(job->hwctx->client->xdna);
 	job->hwctx->priv->completed++;
@@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence;
 	int ret;
 
+	trace_xdna_job(sched_job, hwctx->name, "job run",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
+
 	if (!hwctx->priv->mbox_chann)
 		return NULL;
 
@@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	} else {
 		aie2_tdr_signal(hwctx->client->xdna);
 	}
-	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+	trace_xdna_job(sched_job, hwctx->name, "sent to device",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
 
 	return fence;
 }
@@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job)
 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
 	struct amdxdna_hwctx *hwctx = job->hwctx;
 
-	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+	trace_xdna_job(sched_job, hwctx->name, "job free",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
 	if (!job->job_done)
 		up(&hwctx->priv->job_sem);
 
@@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
 	int ret;
 
 	xdna = hwctx->client->xdna;
-	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
 
 	guard(mutex)(&xdna->dev_lock);
 
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index ff6c3e8e5a15..2c2c21992c87 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
 		goto unlock_srcu;
 	}
 
-
 	job->hwctx = hwctx;
 	job->mm = current->mm;
 
@@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
 	if (args->ext || args->ext_flags)
 		return -EINVAL;
 
+	trace_amdxdna_debug_point(current->comm, args->type, "job received");
+
 	switch (args->type) {
 	case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
 		return amdxdna_drm_submit_execbuf(client, args);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index a8557d7e8923..355798687376 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -119,6 +119,7 @@ struct amdxdna_hwctx {
 	container_of(j, struct amdxdna_sched_job, base)
 
 enum amdxdna_job_opcode {
+	DEFAULT_IO,
 	SYNC_DEBUG_BO,
 	ATTACH_DEBUG_BO,
 	DETACH_DEBUG_BO,
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 37771bdb24a1..cc8865f4e79c 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
 	int ret;
 
 	mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+	trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
 
 	if (READ_ONCE(mb_chann->bad_state)) {
 		MB_ERR(mb_chann, "Channel in bad state, work aborted");
diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
index c6cb2da7b706..71da24267e52 100644
--- a/include/trace/events/amdxdna.h
+++ b/include/trace/events/amdxdna.h
@@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
 );
 
 TRACE_EVENT(xdna_job,
-	    TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
+	    TP_PROTO(struct drm_sched_job *sched_job, const char *name,
+		     const char *str, u64 seq, u32 op),
 
-	    TP_ARGS(sched_job, name, str, seq),
+	    TP_ARGS(sched_job, name, str, seq, op),
 
 	    TP_STRUCT__entry(__string(name, name)
 			     __string(str, str)
 			     __field(u64, fence_context)
 			     __field(u64, fence_seqno)
-			     __field(u64, seq)),
+			     __field(u64, seq)
+			     __field(u32, op)),
 
 	    TP_fast_assign(__assign_str(name);
 			   __assign_str(str);
 			   __entry->fence_context = sched_job->s_fence->finished.context;
 			   __entry->fence_seqno = sched_job->s_fence->finished.seqno;
-			   __entry->seq = seq;),
+			   __entry->seq = seq;
+			   __entry->op = op;),
 
-	    TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
+	    TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u",
 		      __entry->fence_context, __entry->fence_seqno,
 		      __get_str(name), __entry->seq,
-		      __get_str(str))
+		      __get_str(str),
+		      __entry->op)
 );
 
 DECLARE_EVENT_CLASS(xdna_mbox_msg,
@@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
 	     TP_ARGS(name, chann_id, opcode, id)
 );
 
-TRACE_EVENT(mbox_irq_handle,
-	    TP_PROTO(char *name, int irq),
+DECLARE_EVENT_CLASS(xdna_mbox_name_id,
+		    TP_PROTO(char *name, int irq),
 
-	    TP_ARGS(name, irq),
+		    TP_ARGS(name, irq),
 
-	    TP_STRUCT__entry(__string(name, name)
-			     __field(int, irq)),
+		    TP_STRUCT__entry(__string(name, name)
+				     __field(int, irq)),
 
-	    TP_fast_assign(__assign_str(name);
-			   __entry->irq = irq;),
+		    TP_fast_assign(__assign_str(name);
+				   __entry->irq = irq;),
+
+		    TP_printk("%s.%d", __get_str(name), __entry->irq)
+);
+
+DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
+	     TP_PROTO(char *name, int irq),
+	     TP_ARGS(name, irq)
+);
 
-	    TP_printk("%s.%d", __get_str(name), __entry->irq)
+DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
+	     TP_PROTO(char *name, int irq),
+	     TP_ARGS(name, irq)
 );
 
 #endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
  2026-04-21 18:15 [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Lizhi Hou
@ 2026-04-21 19:18 ` Mario Limonciello
  2026-04-21 19:39   ` Lizhi Hou
  0 siblings, 1 reply; 5+ messages in thread
From: Mario Limonciello @ 2026-04-21 19:18 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan



On 4/21/26 13:15, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
> 
> Add more trace coverage to amdxdna job handling and mailbox receive
> processing to make driver execution easier to debug.
> 
> Extend the xdna_job trace event to record the command opcode in
> addition to the job sequence number. Use the enhanced tracepoint in
> the job run, sent-to-device, signaled-fence, and job-free paths so
> that trace output can be correlated with the command being executed.
> 
> Also add debug-point tracing when a command is received through the
> submit ioctl path, and add a trace event when the mailbox RX worker
> runs.
> 
> These changes improve visibility into job lifetime transitions and
> mailbox activity, which helps debug command flow and scheduler issues.
> 
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_ctx.c        | 14 ++++++---
>   drivers/accel/amdxdna/amdxdna_ctx.c     |  3 +-
>   drivers/accel/amdxdna/amdxdna_ctx.h     |  1 +
>   drivers/accel/amdxdna/amdxdna_mailbox.c |  1 +
>   include/trace/events/amdxdna.h          | 42 ++++++++++++++++---------
>   5 files changed, 42 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index d37123d925b6..3b0feba448c4 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>   	struct amdxdna_sched_job *job;
>   
>   	job = container_of(ref, struct amdxdna_sched_job, refcnt);
> +
>   	amdxdna_sched_job_cleanup(job);
>   	atomic64_inc(&job->hwctx->job_free_cnt);
>   	wake_up(&job->hwctx->priv->job_free_wq);
> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>   {
>   	struct dma_fence *fence = job->fence;
>   
> -	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
> +	trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
> +		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
>   
>   	aie2_tdr_signal(job->hwctx->client->xdna);
>   	job->hwctx->priv->completed++;
> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>   	struct dma_fence *fence;
>   	int ret;
>   
> +	trace_xdna_job(sched_job, hwctx->name, "job run",
> +		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
> +
>   	if (!hwctx->priv->mbox_chann)
>   		return NULL;
>   
> @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>   	} else {
>   		aie2_tdr_signal(hwctx->client->xdna);
>   	}
> -	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
> +	trace_xdna_job(sched_job, hwctx->name, "sent to device",
> +		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
>   
>   	return fence;
>   }
> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job)
>   	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>   	struct amdxdna_hwctx *hwctx = job->hwctx;
>   
> -	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
> +	trace_xdna_job(sched_job, hwctx->name, "job free",
> +		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
>   	if (!job->job_done)
>   		up(&hwctx->priv->job_sem);
>   
> @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
>   	int ret;
>   
>   	xdna = hwctx->client->xdna;
> -	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>   
>   	guard(mutex)(&xdna->dev_lock);
>   
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index ff6c3e8e5a15..2c2c21992c87 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
>   		goto unlock_srcu;
>   	}
>   
> -
>   	job->hwctx = hwctx;
>   	job->mm = current->mm;
>   
> @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
>   	if (args->ext || args->ext_flags)
>   		return -EINVAL;
>   
> +	trace_amdxdna_debug_point(current->comm, args->type, "job received");
> +
>   	switch (args->type) {
>   	case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>   		return amdxdna_drm_submit_execbuf(client, args);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index a8557d7e8923..355798687376 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>   	container_of(j, struct amdxdna_sched_job, base)
>   
>   enum amdxdna_job_opcode {
> +	DEFAULT_IO,

Do you really want this at the beginning of the list?  Doesn't that 
break uses of amdxdna_drv_cmd that has the previous indexing?

>   	SYNC_DEBUG_BO,
>   	ATTACH_DEBUG_BO,
>   	DETACH_DEBUG_BO,
> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
> index 37771bdb24a1..cc8865f4e79c 100644
> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
>   	int ret;
>   
>   	mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
> +	trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>   
>   	if (READ_ONCE(mb_chann->bad_state)) {
>   		MB_ERR(mb_chann, "Channel in bad state, work aborted");
> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
> index c6cb2da7b706..71da24267e52 100644
> --- a/include/trace/events/amdxdna.h
> +++ b/include/trace/events/amdxdna.h
> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>   );
>   
>   TRACE_EVENT(xdna_job,
> -	    TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
> +	    TP_PROTO(struct drm_sched_job *sched_job, const char *name,
> +		     const char *str, u64 seq, u32 op),
>   
> -	    TP_ARGS(sched_job, name, str, seq),
> +	    TP_ARGS(sched_job, name, str, seq, op),
>   
>   	    TP_STRUCT__entry(__string(name, name)
>   			     __string(str, str)
>   			     __field(u64, fence_context)
>   			     __field(u64, fence_seqno)
> -			     __field(u64, seq)),
> +			     __field(u64, seq)
> +			     __field(u32, op)),
>   
>   	    TP_fast_assign(__assign_str(name);
>   			   __assign_str(str);
>   			   __entry->fence_context = sched_job->s_fence->finished.context;
>   			   __entry->fence_seqno = sched_job->s_fence->finished.seqno;
> -			   __entry->seq = seq;),
> +			   __entry->seq = seq;
> +			   __entry->op = op;),
>   
> -	    TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
> +	    TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u",
>   		      __entry->fence_context, __entry->fence_seqno,
>   		      __get_str(name), __entry->seq,
> -		      __get_str(str))
> +		      __get_str(str),
> +		      __entry->op)
>   );
>   
>   DECLARE_EVENT_CLASS(xdna_mbox_msg,
> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>   	     TP_ARGS(name, chann_id, opcode, id)
>   );
>   
> -TRACE_EVENT(mbox_irq_handle,
> -	    TP_PROTO(char *name, int irq),
> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
> +		    TP_PROTO(char *name, int irq),
>   
> -	    TP_ARGS(name, irq),
> +		    TP_ARGS(name, irq),
>   
> -	    TP_STRUCT__entry(__string(name, name)
> -			     __field(int, irq)),
> +		    TP_STRUCT__entry(__string(name, name)
> +				     __field(int, irq)),
>   
> -	    TP_fast_assign(__assign_str(name);
> -			   __entry->irq = irq;),
> +		    TP_fast_assign(__assign_str(name);
> +				   __entry->irq = irq;),
> +
> +		    TP_printk("%s.%d", __get_str(name), __entry->irq)
> +);
> +
> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
> +	     TP_PROTO(char *name, int irq),
> +	     TP_ARGS(name, irq)
> +);
>   
> -	    TP_printk("%s.%d", __get_str(name), __entry->irq)
> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
> +	     TP_PROTO(char *name, int irq),
> +	     TP_ARGS(name, irq)
>   );
>   
>   #endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
  2026-04-21 19:18 ` Mario Limonciello
@ 2026-04-21 19:39   ` Lizhi Hou
  2026-04-21 19:45     ` Mario Limonciello
  0 siblings, 1 reply; 5+ messages in thread
From: Lizhi Hou @ 2026-04-21 19:39 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan


On 4/21/26 12:18, Mario Limonciello wrote:
>
>
> On 4/21/26 13:15, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add more trace coverage to amdxdna job handling and mailbox receive
>> processing to make driver execution easier to debug.
>>
>> Extend the xdna_job trace event to record the command opcode in
>> addition to the job sequence number. Use the enhanced tracepoint in
>> the job run, sent-to-device, signaled-fence, and job-free paths so
>> that trace output can be correlated with the command being executed.
>>
>> Also add debug-point tracing when a command is received through the
>> submit ioctl path, and add a trace event when the mailbox RX worker
>> runs.
>>
>> These changes improve visibility into job lifetime transitions and
>> mailbox activity, which helps debug command flow and scheduler issues.
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_ctx.c        | 14 ++++++---
>>   drivers/accel/amdxdna/amdxdna_ctx.c     |  3 +-
>>   drivers/accel/amdxdna/amdxdna_ctx.h     |  1 +
>>   drivers/accel/amdxdna/amdxdna_mailbox.c |  1 +
>>   include/trace/events/amdxdna.h          | 42 ++++++++++++++++---------
>>   5 files changed, 42 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c 
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index d37123d925b6..3b0feba448c4 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>>       struct amdxdna_sched_job *job;
>>         job = container_of(ref, struct amdxdna_sched_job, refcnt);
>> +
>>       amdxdna_sched_job_cleanup(job);
>>       atomic64_inc(&job->hwctx->job_free_cnt);
>>       wake_up(&job->hwctx->priv->job_free_wq);
>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>   {
>>       struct dma_fence *fence = job->fence;
>>   -    trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", 
>> job->seq);
>> +    trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>> DEFAULT_IO);
>>         aie2_tdr_signal(job->hwctx->client->xdna);
>>       job->hwctx->priv->completed++;
>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>>       struct dma_fence *fence;
>>       int ret;
>>   +    trace_xdna_job(sched_job, hwctx->name, "job run",
>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>> DEFAULT_IO);
>> +
>>       if (!hwctx->priv->mbox_chann)
>>           return NULL;
>>   @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job 
>> *sched_job)
>>       } else {
>>           aie2_tdr_signal(hwctx->client->xdna);
>>       }
>> -    trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
>> +    trace_xdna_job(sched_job, hwctx->name, "sent to device",
>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>> DEFAULT_IO);
>>         return fence;
>>   }
>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct 
>> drm_sched_job *sched_job)
>>       struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>>       struct amdxdna_hwctx *hwctx = job->hwctx;
>>   -    trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>> +    trace_xdna_job(sched_job, hwctx->name, "job free",
>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>> DEFAULT_IO);
>>       if (!job->job_done)
>>           up(&hwctx->priv->job_sem);
>>   @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job 
>> *sched_job)
>>       int ret;
>>         xdna = hwctx->client->xdna;
>> -    trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>>         guard(mutex)(&xdna->dev_lock);
>>   diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c 
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index ff6c3e8e5a15..2c2c21992c87 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client 
>> *client,
>>           goto unlock_srcu;
>>       }
>>   -
>>       job->hwctx = hwctx;
>>       job->mm = current->mm;
>>   @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct 
>> drm_device *dev, void *data, struct drm_
>>       if (args->ext || args->ext_flags)
>>           return -EINVAL;
>>   +    trace_amdxdna_debug_point(current->comm, args->type, "job 
>> received");
>> +
>>       switch (args->type) {
>>       case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>>           return amdxdna_drm_submit_execbuf(client, args);
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index a8557d7e8923..355798687376 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>>       container_of(j, struct amdxdna_sched_job, base)
>>     enum amdxdna_job_opcode {
>> +    DEFAULT_IO,
>
> Do you really want this at the beginning of the list?  Doesn't that 
> break uses of amdxdna_drv_cmd that has the previous indexing?

*_DEBUG_BO is driver internal use only. Using 0 here to align with our 
current trace scripts.

Lizhi

>
>>       SYNC_DEBUG_BO,
>>       ATTACH_DEBUG_BO,
>>       DETACH_DEBUG_BO,
>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c 
>> b/drivers/accel/amdxdna/amdxdna_mailbox.c
>> index 37771bdb24a1..cc8865f4e79c 100644
>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct 
>> *rx_work)
>>       int ret;
>>         mb_chann = container_of(rx_work, struct mailbox_channel, 
>> rx_work);
>> +    trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>>         if (READ_ONCE(mb_chann->bad_state)) {
>>           MB_ERR(mb_chann, "Channel in bad state, work aborted");
>> diff --git a/include/trace/events/amdxdna.h 
>> b/include/trace/events/amdxdna.h
>> index c6cb2da7b706..71da24267e52 100644
>> --- a/include/trace/events/amdxdna.h
>> +++ b/include/trace/events/amdxdna.h
>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>>   );
>>     TRACE_EVENT(xdna_job,
>> -        TP_PROTO(struct drm_sched_job *sched_job, const char *name, 
>> const char *str, u64 seq),
>> +        TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>> +             const char *str, u64 seq, u32 op),
>>   -        TP_ARGS(sched_job, name, str, seq),
>> +        TP_ARGS(sched_job, name, str, seq, op),
>>             TP_STRUCT__entry(__string(name, name)
>>                    __string(str, str)
>>                    __field(u64, fence_context)
>>                    __field(u64, fence_seqno)
>> -                 __field(u64, seq)),
>> +                 __field(u64, seq)
>> +                 __field(u32, op)),
>>             TP_fast_assign(__assign_str(name);
>>                  __assign_str(str);
>>                  __entry->fence_context = 
>> sched_job->s_fence->finished.context;
>>                  __entry->fence_seqno = 
>> sched_job->s_fence->finished.seqno;
>> -               __entry->seq = seq;),
>> +               __entry->seq = seq;
>> +               __entry->op = op;),
>>   -        TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld 
>> %s",
>> +        TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu 
>> %s, op=%u",
>>                 __entry->fence_context, __entry->fence_seqno,
>>                 __get_str(name), __entry->seq,
>> -              __get_str(str))
>> +              __get_str(str),
>> +              __entry->op)
>>   );
>>     DECLARE_EVENT_CLASS(xdna_mbox_msg,
>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>>            TP_ARGS(name, chann_id, opcode, id)
>>   );
>>   -TRACE_EVENT(mbox_irq_handle,
>> -        TP_PROTO(char *name, int irq),
>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>> +            TP_PROTO(char *name, int irq),
>>   -        TP_ARGS(name, irq),
>> +            TP_ARGS(name, irq),
>>   -        TP_STRUCT__entry(__string(name, name)
>> -                 __field(int, irq)),
>> +            TP_STRUCT__entry(__string(name, name)
>> +                     __field(int, irq)),
>>   -        TP_fast_assign(__assign_str(name);
>> -               __entry->irq = irq;),
>> +            TP_fast_assign(__assign_str(name);
>> +                   __entry->irq = irq;),
>> +
>> +            TP_printk("%s.%d", __get_str(name), __entry->irq)
>> +);
>> +
>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>> +         TP_PROTO(char *name, int irq),
>> +         TP_ARGS(name, irq)
>> +);
>>   -        TP_printk("%s.%d", __get_str(name), __entry->irq)
>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>> +         TP_PROTO(char *name, int irq),
>> +         TP_ARGS(name, irq)
>>   );
>>     #endif /* !defined(_TRACE_AMDXDNA_H) || 
>> defined(TRACE_HEADER_MULTI_READ) */
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
  2026-04-21 19:39   ` Lizhi Hou
@ 2026-04-21 19:45     ` Mario Limonciello
  2026-04-22 15:38       ` Lizhi Hou
  0 siblings, 1 reply; 5+ messages in thread
From: Mario Limonciello @ 2026-04-21 19:45 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan



On 4/21/26 14:39, Lizhi Hou wrote:
> 
> On 4/21/26 12:18, Mario Limonciello wrote:
>>
>>
>> On 4/21/26 13:15, Lizhi Hou wrote:
>>> From: Max Zhen <max.zhen@amd.com>
>>>
>>> Add more trace coverage to amdxdna job handling and mailbox receive
>>> processing to make driver execution easier to debug.
>>>
>>> Extend the xdna_job trace event to record the command opcode in
>>> addition to the job sequence number. Use the enhanced tracepoint in
>>> the job run, sent-to-device, signaled-fence, and job-free paths so
>>> that trace output can be correlated with the command being executed.
>>>
>>> Also add debug-point tracing when a command is received through the
>>> submit ioctl path, and add a trace event when the mailbox RX worker
>>> runs.
>>>
>>> These changes improve visibility into job lifetime transitions and
>>> mailbox activity, which helps debug command flow and scheduler issues.
>>>
>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>>> ---
>>>   drivers/accel/amdxdna/aie2_ctx.c        | 14 ++++++---
>>>   drivers/accel/amdxdna/amdxdna_ctx.c     |  3 +-
>>>   drivers/accel/amdxdna/amdxdna_ctx.h     |  1 +
>>>   drivers/accel/amdxdna/amdxdna_mailbox.c |  1 +
>>>   include/trace/events/amdxdna.h          | 42 ++++++++++++++++---------
>>>   5 files changed, 42 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/ 
>>> amdxdna/aie2_ctx.c
>>> index d37123d925b6..3b0feba448c4 100644
>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>>>       struct amdxdna_sched_job *job;
>>>         job = container_of(ref, struct amdxdna_sched_job, refcnt);
>>> +
>>>       amdxdna_sched_job_cleanup(job);
>>>       atomic64_inc(&job->hwctx->job_free_cnt);
>>>       wake_up(&job->hwctx->priv->job_free_wq);
>>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>>   {
>>>       struct dma_fence *fence = job->fence;
>>>   -    trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", 
>>> job->seq);
>>> +    trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>> DEFAULT_IO);
>>>         aie2_tdr_signal(job->hwctx->client->xdna);
>>>       job->hwctx->priv->completed++;
>>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>>>       struct dma_fence *fence;
>>>       int ret;
>>>   +    trace_xdna_job(sched_job, hwctx->name, "job run",
>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>> DEFAULT_IO);
>>> +
>>>       if (!hwctx->priv->mbox_chann)
>>>           return NULL;
>>>   @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job 
>>> *sched_job)
>>>       } else {
>>>           aie2_tdr_signal(hwctx->client->xdna);
>>>       }
>>> -    trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
>>> +    trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>> DEFAULT_IO);
>>>         return fence;
>>>   }
>>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct 
>>> drm_sched_job *sched_job)
>>>       struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>>>       struct amdxdna_hwctx *hwctx = job->hwctx;
>>>   -    trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>>> +    trace_xdna_job(sched_job, hwctx->name, "job free",
>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>> DEFAULT_IO);
>>>       if (!job->job_done)
>>>           up(&hwctx->priv->job_sem);
>>>   @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job 
>>> *sched_job)
>>>       int ret;
>>>         xdna = hwctx->client->xdna;
>>> -    trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>>>         guard(mutex)(&xdna->dev_lock);
>>>   diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/ 
>>> amdxdna/amdxdna_ctx.c
>>> index ff6c3e8e5a15..2c2c21992c87 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client 
>>> *client,
>>>           goto unlock_srcu;
>>>       }
>>>   -
>>>       job->hwctx = hwctx;
>>>       job->mm = current->mm;
>>>   @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct 
>>> drm_device *dev, void *data, struct drm_
>>>       if (args->ext || args->ext_flags)
>>>           return -EINVAL;
>>>   +    trace_amdxdna_debug_point(current->comm, args->type, "job 
>>> received");
>>> +
>>>       switch (args->type) {
>>>       case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>>>           return amdxdna_drm_submit_execbuf(client, args);
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/ 
>>> amdxdna/amdxdna_ctx.h
>>> index a8557d7e8923..355798687376 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>>>       container_of(j, struct amdxdna_sched_job, base)
>>>     enum amdxdna_job_opcode {
>>> +    DEFAULT_IO,
>>
>> Do you really want this at the beginning of the list?  Doesn't that 
>> break uses of amdxdna_drv_cmd that has the previous indexing?
> 
> *_DEBUG_BO is driver internal use only. Using 0 here to align with our 
> current trace scripts.
> 
> Lizhi
> 
>>
>>>       SYNC_DEBUG_BO,
>>>       ATTACH_DEBUG_BO,
>>>       DETACH_DEBUG_BO,
>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/ 
>>> amdxdna/amdxdna_mailbox.c
>>> index 37771bdb24a1..cc8865f4e79c 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct 
>>> *rx_work)
>>>       int ret;
>>>         mb_chann = container_of(rx_work, struct mailbox_channel, 
>>> rx_work);
>>> +    trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>>>         if (READ_ONCE(mb_chann->bad_state)) {
>>>           MB_ERR(mb_chann, "Channel in bad state, work aborted");
>>> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/ 
>>> amdxdna.h
>>> index c6cb2da7b706..71da24267e52 100644
>>> --- a/include/trace/events/amdxdna.h
>>> +++ b/include/trace/events/amdxdna.h
>>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>>>   );
>>>     TRACE_EVENT(xdna_job,
>>> -        TP_PROTO(struct drm_sched_job *sched_job, const char *name, 
>>> const char *str, u64 seq),
>>> +        TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>>> +             const char *str, u64 seq, u32 op),
>>>   -        TP_ARGS(sched_job, name, str, seq),
>>> +        TP_ARGS(sched_job, name, str, seq, op),
>>>             TP_STRUCT__entry(__string(name, name)
>>>                    __string(str, str)
>>>                    __field(u64, fence_context)
>>>                    __field(u64, fence_seqno)
>>> -                 __field(u64, seq)),
>>> +                 __field(u64, seq)
>>> +                 __field(u32, op)),
>>>             TP_fast_assign(__assign_str(name);
>>>                  __assign_str(str);
>>>                  __entry->fence_context = sched_job->s_fence- 
>>> >finished.context;
>>>                  __entry->fence_seqno = sched_job->s_fence- 
>>> >finished.seqno;
>>> -               __entry->seq = seq;),
>>> +               __entry->seq = seq;
>>> +               __entry->op = op;),
>>>   -        TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld 
>>> %s",
>>> +        TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu 
>>> %s, op=%u",
>>>                 __entry->fence_context, __entry->fence_seqno,
>>>                 __get_str(name), __entry->seq,
>>> -              __get_str(str))
>>> +              __get_str(str),
>>> +              __entry->op)
>>>   );
>>>     DECLARE_EVENT_CLASS(xdna_mbox_msg,
>>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>>>            TP_ARGS(name, chann_id, opcode, id)
>>>   );
>>>   -TRACE_EVENT(mbox_irq_handle,
>>> -        TP_PROTO(char *name, int irq),
>>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>>> +            TP_PROTO(char *name, int irq),
>>>   -        TP_ARGS(name, irq),
>>> +            TP_ARGS(name, irq),
>>>   -        TP_STRUCT__entry(__string(name, name)
>>> -                 __field(int, irq)),
>>> +            TP_STRUCT__entry(__string(name, name)
>>> +                     __field(int, irq)),
>>>   -        TP_fast_assign(__assign_str(name);
>>> -               __entry->irq = irq;),
>>> +            TP_fast_assign(__assign_str(name);
>>> +                   __entry->irq = irq;),
>>> +
>>> +            TP_printk("%s.%d", __get_str(name), __entry->irq)
>>> +);
>>> +
>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>>> +         TP_PROTO(char *name, int irq),
>>> +         TP_ARGS(name, irq)
>>> +);
>>>   -        TP_printk("%s.%d", __get_str(name), __entry->irq)
>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>>> +         TP_PROTO(char *name, int irq),
>>> +         TP_ARGS(name, irq)
>>>   );
>>>     #endif /* !defined(_TRACE_AMDXDNA_H) || 
>>> defined(TRACE_HEADER_MULTI_READ) */
>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
  2026-04-21 19:45     ` Mario Limonciello
@ 2026-04-22 15:38       ` Lizhi Hou
  0 siblings, 0 replies; 5+ messages in thread
From: Lizhi Hou @ 2026-04-22 15:38 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan

Applied to drm-misc-next

On 4/21/26 12:45, Mario Limonciello wrote:
>
>
> On 4/21/26 14:39, Lizhi Hou wrote:
>>
>> On 4/21/26 12:18, Mario Limonciello wrote:
>>>
>>>
>>> On 4/21/26 13:15, Lizhi Hou wrote:
>>>> From: Max Zhen <max.zhen@amd.com>
>>>>
>>>> Add more trace coverage to amdxdna job handling and mailbox receive
>>>> processing to make driver execution easier to debug.
>>>>
>>>> Extend the xdna_job trace event to record the command opcode in
>>>> addition to the job sequence number. Use the enhanced tracepoint in
>>>> the job run, sent-to-device, signaled-fence, and job-free paths so
>>>> that trace output can be correlated with the command being executed.
>>>>
>>>> Also add debug-point tracing when a command is received through the
>>>> submit ioctl path, and add a trace event when the mailbox RX worker
>>>> runs.
>>>>
>>>> These changes improve visibility into job lifetime transitions and
>>>> mailbox activity, which helps debug command flow and scheduler issues.
>>>>
>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>>>> ---
>>>>   drivers/accel/amdxdna/aie2_ctx.c        | 14 ++++++---
>>>>   drivers/accel/amdxdna/amdxdna_ctx.c     |  3 +-
>>>>   drivers/accel/amdxdna/amdxdna_ctx.h     |  1 +
>>>>   drivers/accel/amdxdna/amdxdna_mailbox.c |  1 +
>>>>   include/trace/events/amdxdna.h          | 42 
>>>> ++++++++++++++++---------
>>>>   5 files changed, 42 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/ 
>>>> amdxdna/aie2_ctx.c
>>>> index d37123d925b6..3b0feba448c4 100644
>>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>>>>       struct amdxdna_sched_job *job;
>>>>         job = container_of(ref, struct amdxdna_sched_job, refcnt);
>>>> +
>>>>       amdxdna_sched_job_cleanup(job);
>>>>       atomic64_inc(&job->hwctx->job_free_cnt);
>>>>       wake_up(&job->hwctx->priv->job_free_wq);
>>>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>>>   {
>>>>       struct dma_fence *fence = job->fence;
>>>>   -    trace_xdna_job(&job->base, job->hwctx->name, "signaled 
>>>> fence", job->seq);
>>>> +    trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>>> DEFAULT_IO);
>>>>         aie2_tdr_signal(job->hwctx->client->xdna);
>>>>       job->hwctx->priv->completed++;
>>>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job 
>>>> *sched_job)
>>>>       struct dma_fence *fence;
>>>>       int ret;
>>>>   +    trace_xdna_job(sched_job, hwctx->name, "job run",
>>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>>> DEFAULT_IO);
>>>> +
>>>>       if (!hwctx->priv->mbox_chann)
>>>>           return NULL;
>>>>   @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job 
>>>> *sched_job)
>>>>       } else {
>>>>           aie2_tdr_signal(hwctx->client->xdna);
>>>>       }
>>>> -    trace_xdna_job(sched_job, hwctx->name, "sent to device", 
>>>> job->seq);
>>>> +    trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>>> DEFAULT_IO);
>>>>         return fence;
>>>>   }
>>>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct 
>>>> drm_sched_job *sched_job)
>>>>       struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>>>>       struct amdxdna_hwctx *hwctx = job->hwctx;
>>>>   -    trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>>>> +    trace_xdna_job(sched_job, hwctx->name, "job free",
>>>> +               job->seq, job->drv_cmd ? job->drv_cmd->opcode : 
>>>> DEFAULT_IO);
>>>>       if (!job->job_done)
>>>>           up(&hwctx->priv->job_sem);
>>>>   @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job 
>>>> *sched_job)
>>>>       int ret;
>>>>         xdna = hwctx->client->xdna;
>>>> -    trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>>>>         guard(mutex)(&xdna->dev_lock);
>>>>   diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/ 
>>>> amdxdna/amdxdna_ctx.c
>>>> index ff6c3e8e5a15..2c2c21992c87 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client 
>>>> *client,
>>>>           goto unlock_srcu;
>>>>       }
>>>>   -
>>>>       job->hwctx = hwctx;
>>>>       job->mm = current->mm;
>>>>   @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct 
>>>> drm_device *dev, void *data, struct drm_
>>>>       if (args->ext || args->ext_flags)
>>>>           return -EINVAL;
>>>>   +    trace_amdxdna_debug_point(current->comm, args->type, "job 
>>>> received");
>>>> +
>>>>       switch (args->type) {
>>>>       case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>>>>           return amdxdna_drm_submit_execbuf(client, args);
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/ 
>>>> amdxdna/amdxdna_ctx.h
>>>> index a8557d7e8923..355798687376 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>>>>       container_of(j, struct amdxdna_sched_job, base)
>>>>     enum amdxdna_job_opcode {
>>>> +    DEFAULT_IO,
>>>
>>> Do you really want this at the beginning of the list?  Doesn't that 
>>> break uses of amdxdna_drv_cmd that has the previous indexing?
>>
>> *_DEBUG_BO is driver internal use only. Using 0 here to align with 
>> our current trace scripts.
>>
>> Lizhi
>>
>>>
>>>>       SYNC_DEBUG_BO,
>>>>       ATTACH_DEBUG_BO,
>>>>       DETACH_DEBUG_BO,
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c 
>>>> b/drivers/accel/ amdxdna/amdxdna_mailbox.c
>>>> index 37771bdb24a1..cc8865f4e79c 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct 
>>>> work_struct *rx_work)
>>>>       int ret;
>>>>         mb_chann = container_of(rx_work, struct mailbox_channel, 
>>>> rx_work);
>>>> +    trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>>>>         if (READ_ONCE(mb_chann->bad_state)) {
>>>>           MB_ERR(mb_chann, "Channel in bad state, work aborted");
>>>> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/ 
>>>> amdxdna.h
>>>> index c6cb2da7b706..71da24267e52 100644
>>>> --- a/include/trace/events/amdxdna.h
>>>> +++ b/include/trace/events/amdxdna.h
>>>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>>>>   );
>>>>     TRACE_EVENT(xdna_job,
>>>> -        TP_PROTO(struct drm_sched_job *sched_job, const char 
>>>> *name, const char *str, u64 seq),
>>>> +        TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>>>> +             const char *str, u64 seq, u32 op),
>>>>   -        TP_ARGS(sched_job, name, str, seq),
>>>> +        TP_ARGS(sched_job, name, str, seq, op),
>>>>             TP_STRUCT__entry(__string(name, name)
>>>>                    __string(str, str)
>>>>                    __field(u64, fence_context)
>>>>                    __field(u64, fence_seqno)
>>>> -                 __field(u64, seq)),
>>>> +                 __field(u64, seq)
>>>> +                 __field(u32, op)),
>>>>             TP_fast_assign(__assign_str(name);
>>>>                  __assign_str(str);
>>>>                  __entry->fence_context = sched_job->s_fence- 
>>>> >finished.context;
>>>>                  __entry->fence_seqno = sched_job->s_fence- 
>>>> >finished.seqno;
>>>> -               __entry->seq = seq;),
>>>> +               __entry->seq = seq;
>>>> +               __entry->op = op;),
>>>>   -        TP_printk("fence=(context:%llu, seqno:%lld), %s 
>>>> seq#:%lld %s",
>>>> +        TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu 
>>>> %s, op=%u",
>>>>                 __entry->fence_context, __entry->fence_seqno,
>>>>                 __get_str(name), __entry->seq,
>>>> -              __get_str(str))
>>>> +              __get_str(str),
>>>> +              __entry->op)
>>>>   );
>>>>     DECLARE_EVENT_CLASS(xdna_mbox_msg,
>>>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>>>>            TP_ARGS(name, chann_id, opcode, id)
>>>>   );
>>>>   -TRACE_EVENT(mbox_irq_handle,
>>>> -        TP_PROTO(char *name, int irq),
>>>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>>>> +            TP_PROTO(char *name, int irq),
>>>>   -        TP_ARGS(name, irq),
>>>> +            TP_ARGS(name, irq),
>>>>   -        TP_STRUCT__entry(__string(name, name)
>>>> -                 __field(int, irq)),
>>>> +            TP_STRUCT__entry(__string(name, name)
>>>> +                     __field(int, irq)),
>>>>   -        TP_fast_assign(__assign_str(name);
>>>> -               __entry->irq = irq;),
>>>> +            TP_fast_assign(__assign_str(name);
>>>> +                   __entry->irq = irq;),
>>>> +
>>>> +            TP_printk("%s.%d", __get_str(name), __entry->irq)
>>>> +);
>>>> +
>>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>>>> +         TP_PROTO(char *name, int irq),
>>>> +         TP_ARGS(name, irq)
>>>> +);
>>>>   -        TP_printk("%s.%d", __get_str(name), __entry->irq)
>>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>>>> +         TP_PROTO(char *name, int irq),
>>>> +         TP_ARGS(name, irq)
>>>>   );
>>>>     #endif /* !defined(_TRACE_AMDXDNA_H) || 
>>>> defined(TRACE_HEADER_MULTI_READ) */
>>>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-22 15:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-21 18:15 [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Lizhi Hou
2026-04-21 19:18 ` Mario Limonciello
2026-04-21 19:39   ` Lizhi Hou
2026-04-21 19:45     ` Mario Limonciello
2026-04-22 15:38       ` Lizhi Hou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox