Intel-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915/guc: add CAT error handler
@ 2022-10-19  8:33 Andrzej Hajda
  2022-10-19 12:47 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Andrzej Hajda @ 2022-10-19  8:33 UTC (permalink / raw)
  To: intel-gfx; +Cc: Andrzej Hajda, Lucas De Marchi, Rodrigo Vivi

In case of catastrophic errors GuC sends notification, which results in
cryptic message. Let's add handler which, for starters, dumps state
of affected engine.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
---
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  2 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c     |  3 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 31 +++++++++++++++++++
 4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index f359bef046e0b2..f9a1c5642855e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -138,6 +138,7 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
 	INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
 	INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	INTEL_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
 	INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
 	INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
 	INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 804133df1ac9b4..61b412732d095a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -445,6 +445,8 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 					 const u32 *msg, u32 len);
 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
 					const u32 *msg, u32 len);
+int intel_guc_cat_error_process_msg(struct intel_guc *guc,
+				    const u32 *msg, u32 len);
 
 struct intel_engine_cs *
 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 2b22065e87bf9a..f55f724e264407 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -1035,6 +1035,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		CT_ERROR(ct, "Received GuC exception notification!\n");
 		ret = 0;
 		break;
+	case INTEL_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+		ret = intel_guc_cat_error_process_msg(guc, payload, len);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 693b07a977893d..94f91dfa3ec456 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4659,6 +4659,37 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 	return 0;
 }
 
+int intel_guc_cat_error_process_msg(struct intel_guc *guc,
+				    const u32 *msg, u32 len)
+{
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	struct drm_printer p;
+	unsigned long flags;
+	int ctx_id;
+
+	if (unlikely(len != 1)) {
+		drm_dbg(&i915->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+	ctx_id = msg[0];
+
+	xa_lock_irqsave(&guc->context_lookup, flags);
+	ce = g2h_context_lookup(guc, ctx_id);
+	if (ce)
+		engine = ce->engine;
+	xa_unlock_irqrestore(&guc->context_lookup, flags);
+	if (unlikely(!ce || !engine))
+		return -EPROTO;
+
+	drm_err(&i915->drm, "%s: CAT error reported by GuC\n", engine->name);
+	p = drm_info_printer(i915->drm.dev);
+	intel_engine_dump(engine, &p, "%s\n", engine->name);
+
+	return 0;
+}
+
 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
 {
 	struct intel_guc *guc = &engine->gt->uc.guc;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-10-19 18:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-10-19  8:33 [Intel-gfx] [PATCH] drm/i915/guc: add CAT error handler Andrzej Hajda
2022-10-19 12:47 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
2022-10-19 16:21 ` [Intel-gfx] [PATCH] " John Harrison
2022-10-19 18:31 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox