public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] drm/xe/mert: Improve handling of MERT CAT errors
@ 2026-01-12 18:37 Michal Wajdeczko
  2026-01-12 19:07 ` ✓ CI.KUnit: success for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Michal Wajdeczko @ 2026-01-12 18:37 UTC (permalink / raw)
  To: intel-xe; +Cc: Michal Wajdeczko, Lukasz Laguna

All MERT catastrophic errors but VF's LMTT fault are serious, so
we shouldn't limit our handling only to print debug messages.

Change CATERR message to error level and then declare the device
as wedged to match expectation from the design document. For the
LMTT faults, add a note about adding tracking of this unexpected
VF activity.

While at it, rename register fields defnitions to match the BSpec.
Also drop trailing include guard name from the regs.h file.

BSpec: 74625
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lukasz Laguna <lukasz.laguna@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_mert_regs.h | 10 +++---
 drivers/gpu/drm/xe/xe_mert.c           | 43 ++++++++++++++++++++------
 2 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_mert_regs.h b/drivers/gpu/drm/xe/regs/xe_mert_regs.h
index c345e11ceea8..99e5a26da657 100644
--- a/drivers/gpu/drm/xe/regs/xe_mert_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mert_regs.h
@@ -11,11 +11,13 @@
 #define MERT_LMEM_CFG				XE_REG(0x1448b0)
 
 #define MERT_TLB_CT_INTR_ERR_ID_PORT		XE_REG(0x145190)
-#define   MERT_TLB_CT_VFID_MASK			REG_GENMASK(16, 9)
-#define   MERT_TLB_CT_ERROR_MASK		REG_GENMASK(5, 0)
-#define     MERT_TLB_CT_LMTT_FAULT		0x05
+#define   CATERR_VFID				REG_GENMASK(16, 9)
+#define   CATERR_CODES				REG_GENMASK(5, 0)
+#define     CATERR_NO_ERROR			0x00
+#define     CATERR_UNMAPPED_GGTT		0x01
+#define     CATERR_LMTT_FAULT			0x05
 
 #define MERT_TLB_INV_DESC_A			XE_REG(0x14cf7c)
 #define   MERT_TLB_INV_DESC_A_VALID		REG_BIT(0)
 
-#endif /* _XE_MERT_REGS_H_ */
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mert.c b/drivers/gpu/drm/xe/xe_mert.c
index fc027d2d7a5e..f637df95418b 100644
--- a/drivers/gpu/drm/xe/xe_mert.c
+++ b/drivers/gpu/drm/xe/xe_mert.c
@@ -9,6 +9,7 @@
 #include "xe_device.h"
 #include "xe_mert.h"
 #include "xe_mmio.h"
+#include "xe_sriov_printk.h"
 #include "xe_tile.h"
 
 /**
@@ -55,6 +56,37 @@ int xe_mert_invalidate_lmtt(struct xe_device *xe)
 	return 0;
 }
 
+static void mert_handle_cat_error(struct xe_device *xe)
+{
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	u32 reg_val, vfid, code;
+
+	reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
+	if (!reg_val)
+		return;
+	xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
+
+	vfid = FIELD_GET(CATERR_VFID, reg_val);
+	code = FIELD_GET(CATERR_CODES, reg_val);
+
+	switch (code) {
+	case CATERR_NO_ERROR:
+		break;
+	case CATERR_UNMAPPED_GGTT:
+		xe_sriov_err(xe, "MERT: CAT_ERR: Access to an unmapped GGTT!\n");
+		xe_device_declare_wedged(xe);
+		break;
+	case CATERR_LMTT_FAULT:
+		xe_sriov_dbg(xe, "MERT: CAT_ERR: VF%u LMTT fault!\n", vfid);
+		/* XXX: track/report malicious VF activity */
+		break;
+	default:
+		xe_sriov_err(xe, "MERT: Unexpected CAT_ERR code=%#x!\n", code);
+		xe_device_declare_wedged(xe);
+		break;
+	}
+}
+
 /**
  * xe_mert_irq_handler - Handler for MERT interrupts
  * @xe: the &xe_device
@@ -68,20 +100,11 @@ void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl)
 	struct xe_mert *mert = &tile->mert;
 	unsigned long flags;
 	u32 reg_val;
-	u8 err;
 
 	if (!(master_ctl & SOC_H2DMEMINT_IRQ))
 		return;
 
-	reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
-	xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
-
-	err = FIELD_GET(MERT_TLB_CT_ERROR_MASK, reg_val);
-	if (err == MERT_TLB_CT_LMTT_FAULT)
-		drm_dbg(&xe->drm, "MERT catastrophic error: LMTT fault (VF%u)\n",
-			FIELD_GET(MERT_TLB_CT_VFID_MASK, reg_val));
-	else if (err)
-		drm_dbg(&xe->drm, "MERT catastrophic error: Unexpected fault (0x%x)\n", err);
+	mert_handle_cat_error(xe);
 
 	spin_lock_irqsave(&mert->lock, flags);
 	if (mert->tlb_inv_triggered) {
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-01-14  8:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-12 18:37 [PATCH] drm/xe/mert: Improve handling of MERT CAT errors Michal Wajdeczko
2026-01-12 19:07 ` ✓ CI.KUnit: success for " Patchwork
2026-01-12 19:58 ` ✓ Xe.CI.BAT: " Patchwork
2026-01-13  1:18 ` ✗ Xe.CI.Full: failure " Patchwork
2026-01-14  8:56 ` [PATCH] " Laguna, Lukasz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox