From: "José Roberto de Souza" <jose.souza@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Maarten Lankhorst <dev@lankhorst.se>,
Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: [PATCH v2 2/9] drm/xe: Change devcoredump functions parameters to xe_sched_job
Date: Mon, 22 Jan 2024 13:17:02 -0800 [thread overview]
Message-ID: <20240122211709.224419-2-jose.souza@intel.com> (raw)
In-Reply-To: <20240122211709.224419-1-jose.souza@intel.com>
When devcoredump start to dump the VMs contents it will be necessary
to know the starting addresses of batch buffers of the job that hang.
This information it set in xe_sched_job and xe_sched_job is not easily
acessible from xe_exec_queue, so here changing the parameter, next
patch will append the batch buffer addresses to devcoredump snapshot
capture.
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Maarten Lankhorst <dev@lankhorst.se>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
---
drivers/gpu/drm/xe/xe_devcoredump.c | 12 ++++++----
drivers/gpu/drm/xe/xe_devcoredump.h | 6 ++---
drivers/gpu/drm/xe/xe_guc_submit.c | 36 ++++++++++++++++++++++-------
drivers/gpu/drm/xe/xe_guc_submit.h | 4 ++--
4 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 68abc0b195beb..0f23ecc74b162 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -16,6 +16,7 @@
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
+#include "xe_sched_job.h"
/**
* DOC: Xe device coredump
@@ -123,9 +124,10 @@ static void xe_devcoredump_free(void *data)
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
- struct xe_exec_queue *q)
+ struct xe_sched_job *job)
{
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+ struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
@@ -150,7 +152,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
- coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
+ coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
for_each_hw_engine(hwe, q->gt, id) {
if (hwe->class != q->hwe->class ||
@@ -173,9 +175,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
* gt_reset. It is skipped if we still have the core dump device available
* with the information of the 'first' snapshot.
*/
-void xe_devcoredump(struct xe_exec_queue *q)
+void xe_devcoredump(struct xe_sched_job *job)
{
- struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_device *xe = gt_to_xe(job->q->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
if (coredump->captured) {
@@ -184,7 +186,7 @@ void xe_devcoredump(struct xe_exec_queue *q)
}
coredump->captured = true;
- devcoredump_snapshot(coredump, q);
+ devcoredump_snapshot(coredump, job);
drm_info(&xe->drm, "Xe device coredump has been created\n");
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index 6ac218a5c1945..df8671f0b5eb2 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -7,12 +7,12 @@
#define _XE_DEVCOREDUMP_H_
struct xe_device;
-struct xe_exec_queue;
+struct xe_sched_job;
#ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_exec_queue *q);
+void xe_devcoredump(struct xe_sched_job *job);
#else
-static inline void xe_devcoredump(struct xe_exec_queue *q)
+static inline void xe_devcoredump(struct xe_sched_job *job)
{
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 7c29b8333c719..dfcc7a0af0a23 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -934,7 +934,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
simple_error_capture(q);
- xe_devcoredump(q);
+ xe_devcoredump(job);
} else {
drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
@@ -1789,12 +1789,12 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
* caller, using `xe_guc_exec_queue_snapshot_free`.
*/
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
{
+ struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_sched_job *job;
struct xe_guc_submit_exec_queue_snapshot *snapshot;
int i;
@@ -1852,14 +1852,16 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
if (!snapshot->pending_list) {
drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
} else {
+ struct xe_sched_job *job_iter;
+
i = 0;
- list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
snapshot->pending_list[i].seqno =
- xe_sched_job_seqno(job);
+ xe_sched_job_seqno(job_iter);
snapshot->pending_list[i].fence =
- dma_fence_is_signaled(job->fence) ? 1 : 0;
+ dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
snapshot->pending_list[i].finished =
- dma_fence_is_signaled(&job->drm.s_fence->finished)
+ dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
? 1 : 0;
i++;
}
@@ -1945,10 +1947,28 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
{
struct xe_guc_submit_exec_queue_snapshot *snapshot;
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ struct xe_sched_job *job;
+ bool found = false;
+
+ spin_lock(&sched->base.job_list_lock);
+ list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ if (job->q == q) {
+ xe_sched_job_get(job);
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&sched->base.job_list_lock);
- snapshot = xe_guc_exec_queue_snapshot_capture(q);
+ if (!found)
+ return;
+
+ snapshot = xe_guc_exec_queue_snapshot_capture(job);
xe_guc_exec_queue_snapshot_print(snapshot, p);
xe_guc_exec_queue_snapshot_free(snapshot);
+
+ xe_sched_job_put(job);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index fc97869c5b865..723dc2bd8df91 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -9,8 +9,8 @@
#include <linux/types.h>
struct drm_printer;
-struct xe_exec_queue;
struct xe_guc;
+struct xe_sched_job;
int xe_guc_submit_init(struct xe_guc *guc);
@@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job);
void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
struct drm_printer *p);
--
2.43.0
next prev parent reply other threads:[~2024-01-22 21:17 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-22 21:17 [PATCH v2 1/9] drm/xe: Remove double new lines in devcoredump José Roberto de Souza
2024-01-22 21:17 ` José Roberto de Souza [this message]
2024-01-22 21:17 ` [PATCH v2 3/9] drm/xe: Add functions to convert regular address to canonical address and back José Roberto de Souza
2024-01-22 21:17 ` [PATCH v2 4/9] drm/xe: Add batch buffer addresses to devcoredump José Roberto de Souza
2024-01-22 21:17 ` [PATCH v2 5/9] drm/xe: Nuke xe from xe_devcoredump José Roberto de Souza
2024-01-22 21:17 ` [PATCH v2 6/9] drm/xe: Stash GMD_ID value in xe_gt José Roberto de Souza
2024-01-22 21:17 ` [PATCH v2 7/9] drm/xe: Print more device information in devcoredump José Roberto de Souza
2024-01-22 21:17 ` [PATCH v2 8/9] drm/xe: Print registers spread in 2 u32 as u64 José Roberto de Souza
2024-01-22 21:17 ` [PATCH v2 9/9] drm/xe: Remove additional spaces in devcoredump HW Engines section José Roberto de Souza
2024-01-22 21:40 ` ✓ CI.Patch_applied: success for series starting with [v2,1/9] drm/xe: Remove double new lines in devcoredump Patchwork
2024-01-22 21:40 ` ✓ CI.checkpatch: " Patchwork
2024-01-22 21:41 ` ✓ CI.KUnit: " Patchwork
2024-01-22 21:49 ` ✓ CI.Build: " Patchwork
2024-01-22 21:49 ` ✗ CI.Hooks: failure " Patchwork
2024-01-22 21:50 ` ✓ CI.checksparse: success " Patchwork
2024-01-22 22:14 ` ✓ CI.BAT: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240122211709.224419-2-jose.souza@intel.com \
--to=jose.souza@intel.com \
--cc=dev@lankhorst.se \
--cc=intel-xe@lists.freedesktop.org \
--cc=rodrigo.vivi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox