Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: John.C.Harrison@Intel.com
To: Intel-Xe@Lists.FreeDesktop.Org
Cc: John Harrison <John.C.Harrison@Intel.com>
Subject: [PATCH v7 08/10] drm/xe/guc: Dump entire CTB on errors
Date: Thu,  5 Sep 2024 13:51:03 -0700	[thread overview]
Message-ID: <20240905205106.1063091-9-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <20240905205106.1063091-1-John.C.Harrison@Intel.com>

From: John Harrison <John.C.Harrison@Intel.com>

The dump of the CT buffers was only showing the unprocessed data which
is not generally useful for saying why a hang occurred - because it
was probably caused by the commands that were just processed. So save
and dump the entire buffer but in a more compact dump format. Also
zero fill it on allocation to avoid confusion over uninitialised data
in the dump.

v2: Add kerneldoc - review feedback from Michal W.
v3: Fix kerneldoc.
v4: Use ascii85 instead of hexdump (review feedback from Matthew B).
v5: Dump the entire CTB object rather than seperately dumping just the
H2G and G2H sections. That way it includes the full header info.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 95 ++++++++++------------------
 drivers/gpu/drm/xe/xe_guc_ct.h       |  8 +--
 drivers/gpu/drm/xe/xe_guc_ct_types.h |  6 +-
 3 files changed, 41 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index e31b1f0b855f..6dd93dcbf1fc 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -17,6 +17,7 @@
 #include "abi/guc_actions_sriov_abi.h"
 #include "abi/guc_klvs_abi.h"
 #include "xe_bo.h"
+#include "xe_devcoredump.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
@@ -444,6 +445,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 
 	xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
 
+	xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
 	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
 	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
 
@@ -1571,49 +1573,33 @@ static void g2h_worker_func(struct work_struct *w)
 	receive_g2h(ct);
 }
 
-static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
-				     struct guc_ctb_snapshot *snapshot,
-				     bool atomic)
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
 {
-	u32 head, tail;
-
-	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
-			   sizeof(struct guc_ct_buffer_desc));
-	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+	struct xe_guc_ct_snapshot *snapshot;
 
-	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
-				       atomic ? GFP_ATOMIC : GFP_KERNEL);
+	snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+	if (!snapshot)
+		return NULL;
 
-	if (!snapshot->cmds) {
-		drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CT info will be available.\n");
-		return;
+	if (ct->bo) {
+		snapshot->ctb_size = ct->bo->size;
+		snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
 	}
 
-	head = snapshot->desc.head;
-	tail = snapshot->desc.tail;
-
-	if (head != tail) {
-		struct iosys_map map =
-			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
-
-		while (head != tail) {
-			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
-			++head;
-			if (head == ctb->info.size) {
-				head = 0;
-				map = ctb->cmds;
-			} else {
-				iosys_map_incr(&map, sizeof(u32));
-			}
-		}
-	}
+	return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+				     struct guc_ctb_snapshot *snapshot)
+{
+	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+			   sizeof(struct guc_ct_buffer_desc));
+	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
 }
 
 static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
 				   struct drm_printer *p)
 {
-	u32 head, tail;
-
 	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
 	drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
 	drm_printf(p, "\thead: %d\n", snapshot->info.head);
@@ -1623,25 +1609,6 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
 	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
 	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
 	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
-
-	if (!snapshot->cmds)
-		return;
-
-	head = snapshot->desc.head;
-	tail = snapshot->desc.tail;
-
-	while (head != tail) {
-		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
-			   snapshot->cmds[head]);
-		++head;
-		if (head == snapshot->info.size)
-			head = 0;
-	}
-}
-
-static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
-{
-	kfree(snapshot->cmds);
 }
 
 /**
@@ -1662,9 +1629,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
 	struct xe_device *xe = ct_to_xe(ct);
 	struct xe_guc_ct_snapshot *snapshot;
 
-	snapshot = kzalloc(sizeof(*snapshot),
-			   atomic ? GFP_ATOMIC : GFP_KERNEL);
-
+	snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
 	if (!snapshot) {
 		drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
 		return NULL;
@@ -1673,12 +1638,13 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
 	if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
 		snapshot->ct_enabled = true;
 		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
-		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
-					 &snapshot->h2g, atomic);
-		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
-					 &snapshot->g2h, atomic);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
 	}
 
+	if (ct->bo && snapshot->ctb)
+		xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
 	return snapshot;
 }
 
@@ -1701,9 +1667,15 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
 
 		drm_puts(p, "G2H CTB (all sizes in DW):\n");
 		guc_ctb_snapshot_print(&snapshot->g2h, p);
-
 		drm_printf(p, "\tg2h outstanding: %d\n",
 			   snapshot->g2h_outstanding);
+
+		if (snapshot->ctb) {
+			xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+		} else {
+			drm_printf(p, "CTB snapshot missing!\n");
+			return;
+		}
 	} else {
 		drm_puts(p, "CT disabled\n");
 	}
@@ -1721,8 +1693,7 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
 	if (!snapshot)
 		return;
 
-	guc_ctb_snapshot_free(&snapshot->h2g);
-	guc_ctb_snapshot_free(&snapshot->g2h);
+	kfree(snapshot->ctb);
 	kfree(snapshot);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 293041bed7ed..338f0b75d29f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -9,6 +9,7 @@
 #include "xe_guc_ct_types.h"
 
 struct drm_printer;
+struct xe_device;
 
 int xe_guc_ct_init(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
@@ -16,10 +17,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
 void xe_guc_ct_stop(struct xe_guc_ct *ct);
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
 
-struct xe_guc_ct_snapshot *
-xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
-void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
-			      struct drm_printer *p);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
 void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
 void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 85e127ec91d7..8e1b9d981d61 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -52,8 +52,6 @@ struct guc_ctb {
 struct guc_ctb_snapshot {
 	/** @desc: snapshot of the CTB descriptor */
 	struct guc_ct_buffer_desc desc;
-	/** @cmds: snapshot of the CTB commands */
-	u32 *cmds;
 	/** @info: snapshot of the CTB info */
 	struct guc_ctb_info info;
 };
@@ -70,6 +68,10 @@ struct xe_guc_ct_snapshot {
 	struct guc_ctb_snapshot g2h;
 	/** @h2g: H2G CTB snapshot */
 	struct guc_ctb_snapshot h2g;
+	/** @ctb_size: size of the snapshot of the CTB */
+	size_t ctb_size;
+	/** @ctb: snapshot of the entire CTB */
+	u32 *ctb;
 };
 
 /**
-- 
2.46.0


  parent reply	other threads:[~2024-09-05 20:51 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-05 20:50 [PATCH v7 00/10] drm/xe/guc: Improve GuC log dumping and add to devcoredump John.C.Harrison
2024-09-05 20:50 ` [PATCH v7 01/10] drm/xe/guc: Remove spurious line feed in debug print John.C.Harrison
2024-09-10 19:32   ` Julia Filipchuk
2024-09-05 20:50 ` [PATCH v7 02/10] drm/xe/devcoredump: Add a section heading for the submission backend John.C.Harrison
2024-09-10 19:33   ` Julia Filipchuk
2024-09-05 20:50 ` [PATCH v7 03/10] drm/xe/devcoredump: Add ASCII85 dump helper function John.C.Harrison
2024-09-06  1:54   ` Lucas De Marchi
2024-09-06  2:01     ` John Harrison
2024-09-06  3:04       ` Lucas De Marchi
2024-09-07  2:06         ` John Harrison
2024-09-10  1:31           ` John Harrison
2024-09-10 19:43             ` Lucas De Marchi
2024-09-10 20:17               ` John Harrison
2024-09-11 19:12                 ` Lucas De Marchi
2024-09-11 19:30                   ` Souza, Jose
2024-09-11 19:35                     ` John Harrison
2024-09-11 19:54                       ` Souza, Jose
2024-09-11 19:59                         ` John Harrison
2024-09-12 13:57                           ` Rodrigo Vivi
2024-09-12 18:06                             ` John Harrison
2024-09-16 15:32                               ` Rodrigo Vivi
2024-09-16 17:46                                 ` John Harrison
2024-09-11 19:31                   ` John Harrison
2024-09-10 19:33   ` Julia Filipchuk
2024-09-11  1:27     ` John Harrison
2024-09-05 20:50 ` [PATCH v7 04/10] drm/xe/guc: Copy GuC log prior to dumping John.C.Harrison
2024-09-11  0:48   ` Julia Filipchuk
2024-09-05 20:51 ` [PATCH v7 05/10] drm/xe/guc: Use a two stage dump for GuC logs and add more info John.C.Harrison
2024-09-11  0:48   ` Julia Filipchuk
2024-09-11  1:14     ` John Harrison
2024-09-05 20:51 ` [PATCH v7 06/10] drm/print: Introduce drm_line_printer John.C.Harrison
2024-09-05 20:51 ` [PATCH v7 07/10] drm/xe/guc: Dead CT helper John.C.Harrison
2024-09-11  0:09   ` John Harrison
2024-09-11 19:55   ` Julia Filipchuk
2024-09-11 20:13     ` John Harrison
2024-09-11 20:57       ` Julia Filipchuk
2024-09-05 20:51 ` John.C.Harrison [this message]
2024-09-11 20:12   ` [PATCH v7 08/10] drm/xe/guc: Dump entire CTB on errors Julia Filipchuk
2024-09-05 20:51 ` [PATCH v7 09/10] drm/xe/guc: Add GuC log to devcoredump captures John.C.Harrison
2024-09-11 20:25   ` Julia Filipchuk
2024-09-05 20:51 ` [PATCH v7 10/10] drm/xe/guc: Add a helper function for dumping GuC log to dmesg John.C.Harrison
2024-09-11 20:36   ` Julia Filipchuk
2024-09-11 20:41     ` John Harrison
2024-09-05 20:57 ` ✓ CI.Patch_applied: success for drm/xe/guc: Improve GuC log dumping and add to devcoredump (rev2) Patchwork
2024-09-05 20:57 ` ✗ CI.checkpatch: warning " Patchwork
2024-09-05 20:58 ` ✓ CI.KUnit: success " Patchwork
2024-09-05 21:10 ` ✓ CI.Build: " Patchwork
2024-09-05 21:13 ` ✓ CI.Hooks: " Patchwork
2024-09-05 21:14 ` ✗ CI.checksparse: warning " Patchwork
2024-09-05 22:06 ` ✗ CI.BAT: failure " Patchwork
2024-09-08  0:02 ` ✗ CI.FULL: " Patchwork
2024-09-12  9:16 ` [PATCH v7 00/10] drm/xe/guc: Improve GuC log dumping and add to devcoredump Jani Nikula
2024-09-12 18:50   ` John Harrison
2024-09-13  7:26     ` Jani Nikula

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240905205106.1063091-9-John.C.Harrison@Intel.com \
    --to=john.c.harrison@intel.com \
    --cc=Intel-Xe@Lists.FreeDesktop.Org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox