public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
@ 2026-04-21 18:15 Lizhi Hou
  2026-04-21 19:18 ` Mario Limonciello
  0 siblings, 1 reply; 5+ messages in thread
From: Lizhi Hou @ 2026-04-21 18:15 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan, Lizhi Hou

From: Max Zhen <max.zhen@amd.com>

Add more trace coverage to amdxdna job handling and mailbox receive
processing to make driver execution easier to debug.

Extend the xdna_job trace event to record the command opcode in
addition to the job sequence number. Use the enhanced tracepoint in
the job run, sent-to-device, signaled-fence, and job-free paths so
that trace output can be correlated with the command being executed.

Also add debug-point tracing when a command is received through the
submit ioctl path, and add a trace event when the mailbox RX worker
runs.

These changes improve visibility into job lifetime transitions and
mailbox activity, which helps debug command flow and scheduler issues.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_ctx.c        | 14 ++++++---
 drivers/accel/amdxdna/amdxdna_ctx.c     |  3 +-
 drivers/accel/amdxdna/amdxdna_ctx.h     |  1 +
 drivers/accel/amdxdna/amdxdna_mailbox.c |  1 +
 include/trace/events/amdxdna.h          | 42 ++++++++++++++++---------
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index d37123d925b6..3b0feba448c4 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
 	struct amdxdna_sched_job *job;
 
 	job = container_of(ref, struct amdxdna_sched_job, refcnt);
+
 	amdxdna_sched_job_cleanup(job);
 	atomic64_inc(&job->hwctx->job_free_cnt);
 	wake_up(&job->hwctx->priv->job_free_wq);
@@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
 {
 	struct dma_fence *fence = job->fence;
 
-	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+	trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
 
 	aie2_tdr_signal(job->hwctx->client->xdna);
 	job->hwctx->priv->completed++;
@@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence;
 	int ret;
 
+	trace_xdna_job(sched_job, hwctx->name, "job run",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
+
 	if (!hwctx->priv->mbox_chann)
 		return NULL;
 
@@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	} else {
 		aie2_tdr_signal(hwctx->client->xdna);
 	}
-	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+	trace_xdna_job(sched_job, hwctx->name, "sent to device",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
 
 	return fence;
 }
@@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job)
 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
 	struct amdxdna_hwctx *hwctx = job->hwctx;
 
-	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+	trace_xdna_job(sched_job, hwctx->name, "job free",
+		       job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
 	if (!job->job_done)
 		up(&hwctx->priv->job_sem);
 
@@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
 	int ret;
 
 	xdna = hwctx->client->xdna;
-	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
 
 	guard(mutex)(&xdna->dev_lock);
 
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index ff6c3e8e5a15..2c2c21992c87 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
 		goto unlock_srcu;
 	}
 
-
 	job->hwctx = hwctx;
 	job->mm = current->mm;
 
@@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
 	if (args->ext || args->ext_flags)
 		return -EINVAL;
 
+	trace_amdxdna_debug_point(current->comm, args->type, "job received");
+
 	switch (args->type) {
 	case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
 		return amdxdna_drm_submit_execbuf(client, args);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index a8557d7e8923..355798687376 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -119,6 +119,7 @@ struct amdxdna_hwctx {
 	container_of(j, struct amdxdna_sched_job, base)
 
 enum amdxdna_job_opcode {
+	DEFAULT_IO,
 	SYNC_DEBUG_BO,
 	ATTACH_DEBUG_BO,
 	DETACH_DEBUG_BO,
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 37771bdb24a1..cc8865f4e79c 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
 	int ret;
 
 	mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+	trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
 
 	if (READ_ONCE(mb_chann->bad_state)) {
 		MB_ERR(mb_chann, "Channel in bad state, work aborted");
diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
index c6cb2da7b706..71da24267e52 100644
--- a/include/trace/events/amdxdna.h
+++ b/include/trace/events/amdxdna.h
@@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
 );
 
 TRACE_EVENT(xdna_job,
-	    TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
+	    TP_PROTO(struct drm_sched_job *sched_job, const char *name,
+		     const char *str, u64 seq, u32 op),
 
-	    TP_ARGS(sched_job, name, str, seq),
+	    TP_ARGS(sched_job, name, str, seq, op),
 
 	    TP_STRUCT__entry(__string(name, name)
 			     __string(str, str)
 			     __field(u64, fence_context)
 			     __field(u64, fence_seqno)
-			     __field(u64, seq)),
+			     __field(u64, seq)
+			     __field(u32, op)),
 
 	    TP_fast_assign(__assign_str(name);
 			   __assign_str(str);
 			   __entry->fence_context = sched_job->s_fence->finished.context;
 			   __entry->fence_seqno = sched_job->s_fence->finished.seqno;
-			   __entry->seq = seq;),
+			   __entry->seq = seq;
+			   __entry->op = op;),
 
-	    TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
+	    TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u",
 		      __entry->fence_context, __entry->fence_seqno,
 		      __get_str(name), __entry->seq,
-		      __get_str(str))
+		      __get_str(str),
+		      __entry->op)
 );
 
 DECLARE_EVENT_CLASS(xdna_mbox_msg,
@@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
 	     TP_ARGS(name, chann_id, opcode, id)
 );
 
-TRACE_EVENT(mbox_irq_handle,
-	    TP_PROTO(char *name, int irq),
+DECLARE_EVENT_CLASS(xdna_mbox_name_id,
+		    TP_PROTO(char *name, int irq),
 
-	    TP_ARGS(name, irq),
+		    TP_ARGS(name, irq),
 
-	    TP_STRUCT__entry(__string(name, name)
-			     __field(int, irq)),
+		    TP_STRUCT__entry(__string(name, name)
+				     __field(int, irq)),
 
-	    TP_fast_assign(__assign_str(name);
-			   __entry->irq = irq;),
+		    TP_fast_assign(__assign_str(name);
+				   __entry->irq = irq;),
+
+		    TP_printk("%s.%d", __get_str(name), __entry->irq)
+);
+
+DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
+	     TP_PROTO(char *name, int irq),
+	     TP_ARGS(name, irq)
+);
 
-	    TP_printk("%s.%d", __get_str(name), __entry->irq)
+DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
+	     TP_PROTO(char *name, int irq),
+	     TP_ARGS(name, irq)
 );
 
 #endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-22 15:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-21 18:15 [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Lizhi Hou
2026-04-21 19:18 ` Mario Limonciello
2026-04-21 19:39   ` Lizhi Hou
2026-04-21 19:45     ` Mario Limonciello
2026-04-22 15:38       ` Lizhi Hou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox