From: Lizhi Hou <lizhi.hou@amd.com>
To: Mario Limonciello <superm1@kernel.org>, <ogabbay@kernel.org>,
<quic_jhugo@quicinc.com>, <dri-devel@lists.freedesktop.org>,
<maciej.falkowski@linux.intel.com>
Cc: Max Zhen <max.zhen@amd.com>, <linux-kernel@vger.kernel.org>,
<sonal.santan@amd.com>
Subject: Re: [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
Date: Wed, 22 Apr 2026 08:38:13 -0700 [thread overview]
Message-ID: <a405d199-2ce2-c8ed-3ede-4c9495b2d7da@amd.com> (raw)
In-Reply-To: <f3d1b859-e5f2-468c-b957-fc3686457213@kernel.org>
Applied to drm-misc-next
On 4/21/26 12:45, Mario Limonciello wrote:
>
>
> On 4/21/26 14:39, Lizhi Hou wrote:
>>
>> On 4/21/26 12:18, Mario Limonciello wrote:
>>>
>>>
>>> On 4/21/26 13:15, Lizhi Hou wrote:
>>>> From: Max Zhen <max.zhen@amd.com>
>>>>
>>>> Add more trace coverage to amdxdna job handling and mailbox receive
>>>> processing to make driver execution easier to debug.
>>>>
>>>> Extend the xdna_job trace event to record the command opcode in
>>>> addition to the job sequence number. Use the enhanced tracepoint in
>>>> the job run, sent-to-device, signaled-fence, and job-free paths so
>>>> that trace output can be correlated with the command being executed.
>>>>
>>>> Also add debug-point tracing when a command is received through the
>>>> submit ioctl path, and add a trace event when the mailbox RX worker
>>>> runs.
>>>>
>>>> These changes improve visibility into job lifetime transitions and
>>>> mailbox activity, which helps debug command flow and scheduler issues.
>>>>
>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>>>> ---
>>>> drivers/accel/amdxdna/aie2_ctx.c | 14 ++++++---
>>>> drivers/accel/amdxdna/amdxdna_ctx.c | 3 +-
>>>> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
>>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
>>>> include/trace/events/amdxdna.h | 42
>>>> ++++++++++++++++---------
>>>> 5 files changed, 42 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>>> amdxdna/aie2_ctx.c
>>>> index d37123d925b6..3b0feba448c4 100644
>>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>>> @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
>>>> struct amdxdna_sched_job *job;
>>>> job = container_of(ref, struct amdxdna_sched_job, refcnt);
>>>> +
>>>> amdxdna_sched_job_cleanup(job);
>>>> atomic64_inc(&job->hwctx->job_free_cnt);
>>>> wake_up(&job->hwctx->priv->job_free_wq);
>>>> @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>>> {
>>>> struct dma_fence *fence = job->fence;
>>>> - trace_xdna_job(&job->base, job->hwctx->name, "signaled
>>>> fence", job->seq);
>>>> + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> aie2_tdr_signal(job->hwctx->client->xdna);
>>>> job->hwctx->priv->completed++;
>>>> @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job
>>>> *sched_job)
>>>> struct dma_fence *fence;
>>>> int ret;
>>>> + trace_xdna_job(sched_job, hwctx->name, "job run",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> +
>>>> if (!hwctx->priv->mbox_chann)
>>>> return NULL;
>>>> @@ -409,7 +414,8 @@ aie2_sched_job_run(struct drm_sched_job
>>>> *sched_job)
>>>> } else {
>>>> aie2_tdr_signal(hwctx->client->xdna);
>>>> }
>>>> - trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>>> job->seq);
>>>> + trace_xdna_job(sched_job, hwctx->name, "sent to device",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> return fence;
>>>> }
>>>> @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct
>>>> drm_sched_job *sched_job)
>>>> struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>>>> struct amdxdna_hwctx *hwctx = job->hwctx;
>>>> - trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>>>> + trace_xdna_job(sched_job, hwctx->name, "job free",
>>>> + job->seq, job->drv_cmd ? job->drv_cmd->opcode :
>>>> DEFAULT_IO);
>>>> if (!job->job_done)
>>>> up(&hwctx->priv->job_sem);
>>>> @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job
>>>> *sched_job)
>>>> int ret;
>>>> xdna = hwctx->client->xdna;
>>>> - trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>>>> guard(mutex)(&xdna->dev_lock);
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>>> amdxdna/amdxdna_ctx.c
>>>> index ff6c3e8e5a15..2c2c21992c87 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>>> *client,
>>>> goto unlock_srcu;
>>>> }
>>>> -
>>>> job->hwctx = hwctx;
>>>> job->mm = current->mm;
>>>> @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct
>>>> drm_device *dev, void *data, struct drm_
>>>> if (args->ext || args->ext_flags)
>>>> return -EINVAL;
>>>> + trace_amdxdna_debug_point(current->comm, args->type, "job
>>>> received");
>>>> +
>>>> switch (args->type) {
>>>> case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
>>>> return amdxdna_drm_submit_execbuf(client, args);
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/
>>>> amdxdna/amdxdna_ctx.h
>>>> index a8557d7e8923..355798687376 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>>> @@ -119,6 +119,7 @@ struct amdxdna_hwctx {
>>>> container_of(j, struct amdxdna_sched_job, base)
>>>> enum amdxdna_job_opcode {
>>>> + DEFAULT_IO,
>>>
>>> Do you really want this at the beginning of the list? Doesn't that
>>> break uses of amdxdna_drv_cmd that has the previous indexing?
>>
>> *_DEBUG_BO is driver internal use only. Using 0 here to align with
>> our current trace scripts.
>>
>> Lizhi
>>
>>>
>>>> SYNC_DEBUG_BO,
>>>> ATTACH_DEBUG_BO,
>>>> DETACH_DEBUG_BO,
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> b/drivers/accel/ amdxdna/amdxdna_mailbox.c
>>>> index 37771bdb24a1..cc8865f4e79c 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct
>>>> work_struct *rx_work)
>>>> int ret;
>>>> mb_chann = container_of(rx_work, struct mailbox_channel,
>>>> rx_work);
>>>> + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
>>>> if (READ_ONCE(mb_chann->bad_state)) {
>>>> MB_ERR(mb_chann, "Channel in bad state, work aborted");
>>>> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/
>>>> amdxdna.h
>>>> index c6cb2da7b706..71da24267e52 100644
>>>> --- a/include/trace/events/amdxdna.h
>>>> +++ b/include/trace/events/amdxdna.h
>>>> @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
>>>> );
>>>> TRACE_EVENT(xdna_job,
>>>> - TP_PROTO(struct drm_sched_job *sched_job, const char
>>>> *name, const char *str, u64 seq),
>>>> + TP_PROTO(struct drm_sched_job *sched_job, const char *name,
>>>> + const char *str, u64 seq, u32 op),
>>>> - TP_ARGS(sched_job, name, str, seq),
>>>> + TP_ARGS(sched_job, name, str, seq, op),
>>>> TP_STRUCT__entry(__string(name, name)
>>>> __string(str, str)
>>>> __field(u64, fence_context)
>>>> __field(u64, fence_seqno)
>>>> - __field(u64, seq)),
>>>> + __field(u64, seq)
>>>> + __field(u32, op)),
>>>> TP_fast_assign(__assign_str(name);
>>>> __assign_str(str);
>>>> __entry->fence_context = sched_job->s_fence-
>>>> >finished.context;
>>>> __entry->fence_seqno = sched_job->s_fence-
>>>> >finished.seqno;
>>>> - __entry->seq = seq;),
>>>> + __entry->seq = seq;
>>>> + __entry->op = op;),
>>>> - TP_printk("fence=(context:%llu, seqno:%lld), %s
>>>> seq#:%lld %s",
>>>> + TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu
>>>> %s, op=%u",
>>>> __entry->fence_context, __entry->fence_seqno,
>>>> __get_str(name), __entry->seq,
>>>> - __get_str(str))
>>>> + __get_str(str),
>>>> + __entry->op)
>>>> );
>>>> DECLARE_EVENT_CLASS(xdna_mbox_msg,
>>>> @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
>>>> TP_ARGS(name, chann_id, opcode, id)
>>>> );
>>>> -TRACE_EVENT(mbox_irq_handle,
>>>> - TP_PROTO(char *name, int irq),
>>>> +DECLARE_EVENT_CLASS(xdna_mbox_name_id,
>>>> + TP_PROTO(char *name, int irq),
>>>> - TP_ARGS(name, irq),
>>>> + TP_ARGS(name, irq),
>>>> - TP_STRUCT__entry(__string(name, name)
>>>> - __field(int, irq)),
>>>> + TP_STRUCT__entry(__string(name, name)
>>>> + __field(int, irq)),
>>>> - TP_fast_assign(__assign_str(name);
>>>> - __entry->irq = irq;),
>>>> + TP_fast_assign(__assign_str(name);
>>>> + __entry->irq = irq;),
>>>> +
>>>> + TP_printk("%s.%d", __get_str(name), __entry->irq)
>>>> +);
>>>> +
>>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
>>>> + TP_PROTO(char *name, int irq),
>>>> + TP_ARGS(name, irq)
>>>> +);
>>>> - TP_printk("%s.%d", __get_str(name), __entry->irq)
>>>> +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
>>>> + TP_PROTO(char *name, int irq),
>>>> + TP_ARGS(name, irq)
>>>> );
>>>> #endif /* !defined(_TRACE_AMDXDNA_H) ||
>>>> defined(TRACE_HEADER_MULTI_READ) */
>>>
>
prev parent reply other threads:[~2026-04-22 15:38 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-21 18:15 [PATCH V1] accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Lizhi Hou
2026-04-21 19:18 ` Mario Limonciello
2026-04-21 19:39 ` Lizhi Hou
2026-04-21 19:45 ` Mario Limonciello
2026-04-22 15:38 ` Lizhi Hou [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a405d199-2ce2-c8ed-3ede-4c9495b2d7da@amd.com \
--to=lizhi.hou@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maciej.falkowski@linux.intel.com \
--cc=max.zhen@amd.com \
--cc=ogabbay@kernel.org \
--cc=quic_jhugo@quicinc.com \
--cc=sonal.santan@amd.com \
--cc=superm1@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox