Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Subject: [PATCH v2] drm/xe: Enhance CT_DEAD for production builds
Date: Fri, 21 Nov 2025 08:25:37 -0800	[thread overview]
Message-ID: <20251121162537.303090-1-matthew.brost@intel.com> (raw)

If the CT fails on production builds, log its state to dmesg for quick
analysis. Also, log the CT state if a G2H fence times out.

v2:
 - Actually log CT state if a G2H fence times out

Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 36 ++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 2697d711adb2..6845d609ec10 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -41,8 +41,8 @@ static void safe_mode_worker_func(struct work_struct *w);
 static void ct_exit_safe_mode(struct xe_guc_ct *ct);
 static void guc_ct_change_state(struct xe_guc_ct *ct,
 				enum xe_guc_ct_state state);
+static void xe_guc_ct_print_err_state(struct xe_guc_ct *ct, int reason);
 
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
 enum {
 	/* Internal states, not error conditions */
 	CT_DEAD_STATE_REARM,			/* 0x0001 */
@@ -63,18 +63,21 @@ enum {
 	CT_DEAD_PARSE_G2H_ORIGIN,		/* 0x2000 */
 	CT_DEAD_PARSE_G2H_TYPE,			/* 0x4000 */
 	CT_DEAD_CRASH,				/* 0x8000 */
+	CT_DEAD_G2H_TIMEOUT,			/* 0x10000 */
 };
 
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
 static void ct_dead_worker_func(struct work_struct *w);
 static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
 
 #define CT_DEAD(ct, ctb, reason_code)		ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
 #else
-#define CT_DEAD(ct, ctb, reason)			\
-	do {						\
-		struct guc_ctb *_ctb = (ctb);		\
-		if (_ctb)				\
-			_ctb->info.broken = true;	\
+#define CT_DEAD(ct, ctb, reason_code)					\
+	do {								\
+		struct guc_ctb *_ctb = (ctb);				\
+		xe_guc_ct_print_err_state(ct, CT_DEAD_##reason_code);	\
+		if (_ctb)						\
+			_ctb->info.broken = true;			\
 	} while (0)
 #endif
 
@@ -1220,6 +1223,7 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	if (!ret) {
 		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
 			  g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+		xe_guc_ct_print_err_state(ct, CT_DEAD_G2H_TIMEOUT);
 		xa_erase(&ct->fence_lookup, g2h_fence.seqno);
 		mutex_unlock(&ct->lock);
 		return -ETIME;
@@ -2016,6 +2020,26 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
 	xe_guc_ct_snapshot_free(snapshot);
 }
 
+static void xe_guc_ct_print_err_state(struct xe_guc_ct *ct, int reason)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+	/* Don't spam dmesg, only print first failure */
+	if (h2g->info.broken || g2h->info.broken)
+		return;
+
+	xe_gt_err(gt, "CT_DEAD: reason=%d\n", reason);
+	xe_gt_err(gt, "H2G.head=%d, H2G.tail=%d, H2G.status=%d\n",
+		  desc_read(xe, h2g, head), desc_read(xe, h2g, tail),
+		  desc_read(xe, h2g, status));
+	xe_gt_err(gt, "G2H.head=%d, G2H.tail=%d, G2H.status=%d\n",
+		  desc_read(xe, g2h, head), desc_read(xe, g2h, tail),
+		  desc_read(xe, g2h, status));
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
 
 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
-- 
2.34.1


                 reply	other threads:[~2025-11-21 16:25 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251121162537.303090-1-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=daniele.ceraolospurio@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox