Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "José Roberto de Souza" <jose.souza@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Maarten Lankhorst <dev@lankhorst.se>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: [PATCH 2/9] drm/xe: Change devcoredump functions parameters to xe_sched_job
Date: Mon, 22 Jan 2024 09:04:38 -0800	[thread overview]
Message-ID: <20240122170445.108856-2-jose.souza@intel.com> (raw)
In-Reply-To: <20240122170445.108856-1-jose.souza@intel.com>

When devcoredump start to dump the VMs contents it will be necessary
to know the starting addresses of batch buffers of the job that hang.

This information it set in xe_sched_job and xe_sched_job is not easily
acessible from xe_exec_queue, so here changing the parameter, next
patch will append the batch buffer addresses to devcoredump snapshot
capture.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Maarten Lankhorst <dev@lankhorst.se>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c | 12 ++++++----
 drivers/gpu/drm/xe/xe_devcoredump.h |  6 ++---
 drivers/gpu/drm/xe/xe_guc_submit.c  | 36 ++++++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_guc_submit.h  |  4 ++--
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 68abc0b195beb..0f23ecc74b162 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -16,6 +16,7 @@
 #include "xe_guc_ct.h"
 #include "xe_guc_submit.h"
 #include "xe_hw_engine.h"
+#include "xe_sched_job.h"
 
 /**
  * DOC: Xe device coredump
@@ -123,9 +124,10 @@ static void xe_devcoredump_free(void *data)
 }
 
 static void devcoredump_snapshot(struct xe_devcoredump *coredump,
-				 struct xe_exec_queue *q)
+				 struct xe_sched_job *job)
 {
 	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+	struct xe_exec_queue *q = job->q;
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
@@ -150,7 +152,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 	xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
 
 	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
-	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
+	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
 
 	for_each_hw_engine(hwe, q->gt, id) {
 		if (hwe->class != q->hwe->class ||
@@ -173,9 +175,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
  * gt_reset. It is skipped if we still have the core dump device available
  * with the information of the 'first' snapshot.
  */
-void xe_devcoredump(struct xe_exec_queue *q)
+void xe_devcoredump(struct xe_sched_job *job)
 {
-	struct xe_device *xe = gt_to_xe(q->gt);
+	struct xe_device *xe = gt_to_xe(job->q->gt);
 	struct xe_devcoredump *coredump = &xe->devcoredump;
 
 	if (coredump->captured) {
@@ -184,7 +186,7 @@ void xe_devcoredump(struct xe_exec_queue *q)
 	}
 
 	coredump->captured = true;
-	devcoredump_snapshot(coredump, q);
+	devcoredump_snapshot(coredump, job);
 
 	drm_info(&xe->drm, "Xe device coredump has been created\n");
 	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index 6ac218a5c1945..df8671f0b5eb2 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -7,12 +7,12 @@
 #define _XE_DEVCOREDUMP_H_
 
 struct xe_device;
-struct xe_exec_queue;
+struct xe_sched_job;
 
 #ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_exec_queue *q);
+void xe_devcoredump(struct xe_sched_job *job);
 #else
-static inline void xe_devcoredump(struct xe_exec_queue *q)
+static inline void xe_devcoredump(struct xe_sched_job *job)
 {
 }
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 7c29b8333c719..dfcc7a0af0a23 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -934,7 +934,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
 			   xe_sched_job_seqno(job), q->guc->id, q->flags);
 		simple_error_capture(q);
-		xe_devcoredump(q);
+		xe_devcoredump(job);
 	} else {
 		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
 			 xe_sched_job_seqno(job), q->guc->id, q->flags);
@@ -1789,12 +1789,12 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
  * caller, using `xe_guc_exec_queue_snapshot_free`.
  */
 struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
 {
+	struct xe_exec_queue *q = job->q;
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &q->guc->sched;
-	struct xe_sched_job *job;
 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
 	int i;
 
@@ -1852,14 +1852,16 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 	if (!snapshot->pending_list) {
 		drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
 	} else {
+		struct xe_sched_job *job_iter;
+
 		i = 0;
-		list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
 			snapshot->pending_list[i].seqno =
-				xe_sched_job_seqno(job);
+				xe_sched_job_seqno(job_iter);
 			snapshot->pending_list[i].fence =
-				dma_fence_is_signaled(job->fence) ? 1 : 0;
+				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
 			snapshot->pending_list[i].finished =
-				dma_fence_is_signaled(&job->drm.s_fence->finished)
+				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
 				? 1 : 0;
 			i++;
 		}
@@ -1945,10 +1947,28 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
 {
 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_sched_job *job;
+	bool found = false;
+
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+		if (job->q == q) {
+			xe_sched_job_get(job);
+			found = true;
+			break;
+		}
+	}
+	spin_unlock(&sched->base.job_list_lock);
 
-	snapshot = xe_guc_exec_queue_snapshot_capture(q);
+	if (!found)
+		return;
+
+	snapshot = xe_guc_exec_queue_snapshot_capture(job);
 	xe_guc_exec_queue_snapshot_print(snapshot, p);
 	xe_guc_exec_queue_snapshot_free(snapshot);
+
+	xe_sched_job_put(job);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index fc97869c5b865..723dc2bd8df91 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -9,8 +9,8 @@
 #include <linux/types.h>
 
 struct drm_printer;
-struct xe_exec_queue;
 struct xe_guc;
+struct xe_sched_job;
 
 int xe_guc_submit_init(struct xe_guc *guc);
 
@@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
 struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job);
 void
 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
 				 struct drm_printer *p);
-- 
2.43.0


  reply	other threads:[~2024-01-22 17:05 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-22 17:04 [PATCH 1/9] drm/xe: Remove double new line in devcoredump José Roberto de Souza
2024-01-22 17:04 ` José Roberto de Souza [this message]
2024-01-22 18:39   ` [PATCH 2/9] drm/xe: Change devcoredump functions parameters to xe_sched_job Summers, Stuart
2024-01-22 17:04 ` [PATCH 3/9] drm/xe: Add functions to convert regular address to canonical address and back José Roberto de Souza
2024-01-22 18:38   ` Summers, Stuart
2024-01-22 18:43     ` Souza, Jose
2024-01-24 15:16       ` Summers, Stuart
2024-01-24 15:20         ` Souza, Jose
2024-01-29 17:54           ` Summers, Stuart
2024-01-23 17:48   ` Jani Nikula
2024-01-22 17:04 ` [PATCH 4/9] drm/xe: Add batch buffer addresses to devcoredump José Roberto de Souza
2024-01-22 20:13   ` Rodrigo Vivi
2024-01-22 17:04 ` [PATCH 5/9] drm/xe: Nuke xe from xe_devcoredump José Roberto de Souza
2024-01-22 20:11   ` Rodrigo Vivi
2024-01-22 20:26     ` Souza, Jose
2024-01-22 20:40       ` Rodrigo Vivi
2024-01-22 17:04 ` [PATCH 6/9] drm/xe: Stash GMD_ID value in xe_gt José Roberto de Souza
2024-01-22 17:04 ` [PATCH 7/9] drm/xe: Print more device information in devcoredump José Roberto de Souza
2024-01-22 20:18   ` Rodrigo Vivi
2024-01-22 21:12     ` Souza, Jose
2024-01-22 17:04 ` [PATCH 8/9] drm/xe: Print registers spread in 2 u32 as u64 José Roberto de Souza
2024-01-22 20:14   ` Rodrigo Vivi
2024-01-22 17:04 ` [PATCH 9/9] drm/xe: Remove addional spaces in devcoredump HW Engines section José Roberto de Souza
2024-01-22 20:18   ` Rodrigo Vivi
2024-01-22 17:32 ` ✓ CI.Patch_applied: success for series starting with [1/9] drm/xe: Remove double new line in devcoredump Patchwork
2024-01-22 17:32 ` ✗ CI.checkpatch: warning " Patchwork
2024-01-22 17:33 ` ✓ CI.KUnit: success " Patchwork
2024-01-22 17:40 ` ✓ CI.Build: " Patchwork
2024-01-22 17:41 ` ✗ CI.Hooks: failure " Patchwork
2024-01-22 17:42 ` ✓ CI.checksparse: success " Patchwork
2024-01-22 18:05 ` ✓ CI.BAT: " Patchwork
2024-01-22 18:28 ` [PATCH 1/9] " Summers, Stuart
2024-01-22 18:40   ` Souza, Jose
2024-01-22 20:20     ` Rodrigo Vivi
2024-01-24 15:17       ` Summers, Stuart

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240122170445.108856-2-jose.souza@intel.com \
    --to=jose.souza@intel.com \
    --cc=dev@lankhorst.se \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox