public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] drm/xe: Wait for HW clearance before issuing the next TLB inval.
@ 2026-04-18  0:03 fei.yang
  2026-04-20 19:58 ` Summers, Stuart
                   ` (4 more replies)
  0 siblings, 5 replies; 24+ messages in thread
From: fei.yang @ 2026-04-18  0:03 UTC (permalink / raw)
  To: intel-xe; +Cc: Fei Yang, Matthew Brost, Stuart Summers, Roper Matthew D

From: Fei Yang <fei.yang@intel.com>

Hardware requires the software to poll the valid bit and make sure
it's cleared before issuing a new TLB invalidation request.
We also need to avoid racing against GuC on TLB invalidations. In
order to achieve that, add a mutex to serialize TLB invalidation
request, and whenever KMD initiates TLB invalidation, make sure
we poll for the clearance of the valid bit before and after issuing
TLB invalidation request.

Signed-off-by: Fei Yang <fei.yang@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Roper Matthew D <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c            |  8 +++-
 drivers/gpu/drm/xe/xe_gt_types.h      |  7 +++
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 63 +++++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 8a31c963c372..186b1c10334b 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -486,15 +486,16 @@ static void wa_14026539277(struct xe_gt *gt)
 
 int xe_gt_init_early(struct xe_gt *gt)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
-	if (IS_SRIOV_PF(gt_to_xe(gt))) {
+	if (IS_SRIOV_PF(xe)) {
 		err = xe_gt_sriov_pf_init_early(gt);
 		if (err)
 			return err;
 	}
 
-	if (IS_SRIOV_VF(gt_to_xe(gt))) {
+	if (IS_SRIOV_VF(xe)) {
 		err = xe_gt_sriov_vf_init_early(gt);
 		if (err)
 			return err;
@@ -514,6 +515,9 @@ int xe_gt_init_early(struct xe_gt *gt)
 
 	xe_force_wake_init_gt(gt, gt_to_fw(gt));
 	spin_lock_init(&gt->global_invl_lock);
+	err = drmm_mutex_init(&xe->drm, &gt->ggtt_tlb_invl_lock);
+	if (err)
+		return err;
 
 	err = xe_gt_tlb_inval_init_early(gt);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7351aadd238e..3dd07d75d195 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -324,6 +324,13 @@ struct xe_gt {
 	 */
 	spinlock_t global_invl_lock;
 
+	/**
+	 * @ggtt_tlb_invl_lock: prevents back to back TLB invalidation
+	 *    by serializing TLB invalidation requests with polling for
+	 *    the valid bit enforced in between
+	 */
+	struct mutex ggtt_tlb_invl_lock;
+
 	/** @wa_active: keep track of active workarounds */
 	struct {
 		/** @wa_active.gt: bitmap with active GT workarounds */
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index ced58f46f846..2062f990e9de 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -63,7 +63,9 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
 	struct xe_guc *guc = tlb_inval->private;
 	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
+	int ret = -ECANCELED;
 
+	mutex_lock(&gt->ggtt_tlb_invl_lock);
 	/*
 	 * Returning -ECANCELED in this function is squashed at the caller and
 	 * signals waiters.
@@ -76,26 +78,81 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
 			MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
 		};
 
-		return send_tlb_inval(guc, action, ARRAY_SIZE(action));
+		ret = send_tlb_inval(guc, action, ARRAY_SIZE(action));
+		goto out;
 	} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
 		struct xe_mmio *mmio = &gt->mmio;
 
 		if (IS_SRIOV_VF(xe))
-			return -ECANCELED;
+			goto out;
+
+		/*
+		 * If there are pending GuC TLB invalidation requests
+		 * KMD requests should be avoided
+		 */
+		if (!list_empty(&gt->tlb_inval.pending_fences))
+			goto out;
 
 		CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
 		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+			/*
+			 * In case of any failure causing CT to be disabled,
+			 * KMD needs to make sure there is no pending TLB
+			 * invalidation issued by GuC before sending more TLB
+			 * request through mmio. Wait 1-second for the valid
+			 * bit to be cleared, otherwise cancel the request.
+			 */
+			ret = xe_mmio_wait32(mmio, PVC_GUC_TLB_INV_DESC0,
+					     PVC_GUC_TLB_INV_DESC0_VALID,
+					     0, 1000 * USEC_PER_MSEC, NULL, true);
+			if (ret) {
+				ret = -ECANCELED;
+				drm_dbg(&xe->drm, "Pending TLB INV not completed\n");
+				goto out;
+			}
+
 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
 					PVC_GUC_TLB_INV_DESC0_VALID);
+
+			/*
+			 * In case the CT is recovered, make sure there is no
+			 * pending TLB invalidation request before GuC takes over
+			 */
+			ret = xe_mmio_wait32(mmio, PVC_GUC_TLB_INV_DESC0,
+					     PVC_GUC_TLB_INV_DESC0_VALID,
+					     0, 1000 * USEC_PER_MSEC, NULL, true);
+			if (ret)
+				drm_dbg(&xe->drm, "TLB INV not completed\n");
+			ret = -ECANCELED;
 		} else {
+			/* See comments in the if clause above */
+			ret = xe_mmio_wait32(mmio, GUC_TLB_INV_CR,
+					     GUC_TLB_INV_CR_INVALIDATE,
+					     0, 1000 * USEC_PER_MSEC, NULL, true);
+			if (ret) {
+				ret = -ECANCELED;
+				drm_dbg(&xe->drm, "Pending TLB INV not completed\n");
+				goto out;
+			}
+
 			xe_mmio_write32(mmio, GUC_TLB_INV_CR,
 					GUC_TLB_INV_CR_INVALIDATE);
+
+			/* See comments in the if clause above */
+			ret = xe_mmio_wait32(mmio, GUC_TLB_INV_CR,
+					     GUC_TLB_INV_CR_INVALIDATE,
+					     0, 1000 * USEC_PER_MSEC, NULL, true);
+			if (ret)
+				drm_dbg(&xe->drm, "TLB INV not completed\n");
+			ret = -ECANCELED;
 		}
 	}
 
-	return -ECANCELED;
+out:
+	mutex_unlock(&gt->ggtt_tlb_invl_lock);
+	return ret;
 }
 
 static int send_page_reclaim(struct xe_guc *guc, u32 seqno,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread
* [PATCH] drm/xe: Wait for HW clearance before issuing the next TLB inval.
@ 2026-04-04  6:22 fei.yang
  2026-04-06 18:59 ` Matthew Brost
  2026-04-06 21:01 ` Matt Roper
  0 siblings, 2 replies; 24+ messages in thread
From: fei.yang @ 2026-04-04  6:22 UTC (permalink / raw)
  To: intel-xe; +Cc: Fei Yang, Matthew Brost, Stuart Summers

From: Fei Yang <fei.yang@intel.com>

Hardware requires the software to poll the valid bit and make sure it's
cleared before issuing a new TLB invalidation request.
We also need to avoid racing against GuC on TLB invalidations by only
allowing KMD to issue TLB invalidation request during GuC reset.

v2: separate ggtt inval to xe_gam_port and call in guc_reset only (Matt)

Signed-off-by: Fei Yang <fei.yang@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
---
 drivers/gpu/drm/xe/Makefile           |  1 +
 drivers/gpu/drm/xe/xe_gam_port.c      | 72 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gam_port.h      | 15 ++++++
 drivers/gpu/drm/xe/xe_gt.c            |  1 +
 drivers/gpu/drm/xe/xe_gt_types.h      |  7 +++
 drivers/gpu/drm/xe/xe_guc.c           |  4 ++
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 18 -------
 7 files changed, 100 insertions(+), 18 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gam_port.c
 create mode 100644 drivers/gpu/drm/xe/xe_gam_port.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index f9abaf687d46..96127ad60e9b 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -78,6 +78,7 @@ xe-y += xe_bb.o \
 	xe_guc_rc.o \
 	xe_guc_submit.o \
 	xe_guc_tlb_inval.o \
+	xe_gam_port.o \
 	xe_heci_gsc.o \
 	xe_huc.o \
 	xe_hw_engine.o \
diff --git a/drivers/gpu/drm/xe/xe_gam_port.c b/drivers/gpu/drm/xe/xe_gam_port.c
new file mode 100644
index 000000000000..137887cf93b8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gam_port.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_gt_stats.h"
+#include "xe_gt_types.h"
+#include "xe_gt_printk.h"
+#include "xe_mmio.h"
+#include "xe_gam_port.h"
+
+#include "regs/xe_guc_regs.h"
+
+/*
+ * GGTT TLB invalidation as part of GuC reset flow while the communication
+ * between host and GuC is disabled.
+ */
+
+int xe_gam_port_tlb_inval_ggtt(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_mmio *mmio = &gt->mmio;
+	int ret = 0;
+
+	spin_lock(&gt->mmio_tlb_invl_lock);
+
+	if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+		/* Wait 1-second for the valid bit to be cleared */
+		ret = xe_mmio_wait32(mmio, PVC_GUC_TLB_INV_DESC0, PVC_GUC_TLB_INV_DESC0_VALID,
+				     0, 1000 * USEC_PER_MSEC, NULL, true);
+		if (ret) {
+			ret = -ECANCELED;
+			goto out;
+		}
+
+		xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+		xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, PVC_GUC_TLB_INV_DESC0_VALID);
+
+		/* Wait 1-second for the valid bit to be cleared */
+		ret = xe_mmio_wait32(mmio, PVC_GUC_TLB_INV_DESC0, PVC_GUC_TLB_INV_DESC0_VALID,
+				     0, 1000 * USEC_PER_MSEC, NULL, true);
+		if (ret) {
+			ret = -ECANCELED;
+			goto out;
+		}
+	} else {
+		/* Wait 1-second for the valid bit to be cleared */
+		ret = xe_mmio_wait32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE,
+				     0, 1000 * USEC_PER_MSEC, NULL, true);
+		if (ret) {
+			ret = -ECANCELED;
+			goto out;
+		}
+
+		xe_mmio_write32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE);
+
+		/* Wait 1-second for the valid bit to be cleared */
+		ret = xe_mmio_wait32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE,
+				     0, 1000 * USEC_PER_MSEC, NULL, true);
+		if (ret) {
+			ret = -ECANCELED;
+			goto out;
+		}
+	}
+
+out:
+	spin_unlock(&gt->mmio_tlb_invl_lock);
+
+	if (ret)
+		xe_gt_warn(gt, "TLB INVAL cancelled due to uncleared valid bit\n");
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_gam_port.h b/drivers/gpu/drm/xe/xe_gam_port.h
new file mode 100644
index 000000000000..b5c383d635f1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gam_port.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GAM_PORT_H_
+#define _XE_GAM_PORT_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gam_port_tlb_inval_ggtt(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 8a31c963c372..6bb990069eff 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -514,6 +514,7 @@ int xe_gt_init_early(struct xe_gt *gt)
 
 	xe_force_wake_init_gt(gt, gt_to_fw(gt));
 	spin_lock_init(&gt->global_invl_lock);
+	spin_lock_init(&gt->mmio_tlb_invl_lock);
 
 	err = xe_gt_tlb_inval_init_early(gt);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 8b55cf25a75f..2418a0b2f19c 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -324,6 +324,13 @@ struct xe_gt {
 	 */
 	spinlock_t global_invl_lock;
 
+	/**
+	 * @mmio_tlb_invl_lock: prevents back to back TLB invalidation
+	 *    without polling for hardware clearance for the previous
+	 *    invalidation
+	 */
+	spinlock_t mmio_tlb_invl_lock;
+
 	/** @wa_active: keep track of active workarounds */
 	struct {
 		/** @wa_active.gt: bitmap with active GT workarounds */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e762eada21db..70eff5b22fbb 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -48,6 +48,7 @@
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
 #include "xe_wopcm.h"
+#include "xe_gam_port.h"
 
 static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
 			    struct xe_bo *bo)
@@ -991,6 +992,9 @@ int xe_guc_reset(struct xe_guc *guc)
 		goto err_out;
 	}
 
+	if (xe_gam_port_tlb_inval_ggtt(gt))
+		xe_gt_warn(gt, "GGTT TLB invalidation timed out\n");
+
 	return 0;
 
 err_out:
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index ced58f46f846..7c3e63e49760 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -61,8 +61,6 @@ static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
 static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
 {
 	struct xe_guc *guc = tlb_inval->private;
-	struct xe_gt *gt = guc_to_gt(guc);
-	struct xe_device *xe = guc_to_xe(guc);
 
 	/*
 	 * Returning -ECANCELED in this function is squashed at the caller and
@@ -77,22 +75,6 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
 		};
 
 		return send_tlb_inval(guc, action, ARRAY_SIZE(action));
-	} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
-		struct xe_mmio *mmio = &gt->mmio;
-
-		if (IS_SRIOV_VF(xe))
-			return -ECANCELED;
-
-		CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
-		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
-			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
-					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
-			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
-					PVC_GUC_TLB_INV_DESC0_VALID);
-		} else {
-			xe_mmio_write32(mmio, GUC_TLB_INV_CR,
-					GUC_TLB_INV_CR_INVALIDATE);
-		}
 	}
 
 	return -ECANCELED;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread
* [PATCH] drm/xe: Wait for HW clearance before issuing the next TLB inval.
@ 2026-03-17 23:21 fei.yang
  2026-03-17 23:28 ` Summers, Stuart
  0 siblings, 1 reply; 24+ messages in thread
From: fei.yang @ 2026-03-17 23:21 UTC (permalink / raw)
  To: intel-xe; +Cc: stuart.summers, matthew.brost, Fei Yang

From: Fei Yang <fei.yang@intel.com>

Hardware requires the software to poll the valid bit and make sure it's
cleared before issuing a new TLB invalidation request.

Signed-off-by: Fei Yang <fei.yang@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index ced58f46f846..4c2f87db3167 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -63,6 +63,7 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
 	struct xe_guc *guc = tlb_inval->private;
 	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
+	int ret;
 
 	/*
 	 * Returning -ECANCELED in this function is squashed at the caller and
@@ -85,11 +86,25 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
 
 		CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
 		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+			/* Wait 1-second for the valid bit to be cleared */
+			ret = xe_mmio_wait32(mmio, PVC_GUC_TLB_INV_DESC0, PVC_GUC_TLB_INV_DESC0_VALID,
+					     0, 1000 * USEC_PER_MSEC, NULL, false);
+			if (ret) {
+				pr_info("TLB INVAL cancelled due to uncleared valid bit\n");
+				return -ECANCELED;
+			}
 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
 					PVC_GUC_TLB_INV_DESC0_VALID);
 		} else {
+			/* Wait 1-second for the valid bit to be cleared */
+			ret = xe_mmio_wait32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE,
+					     0, 1000 * USEC_PER_MSEC, NULL, false);
+			if (ret) {
+				pr_info("TLB INVAL cancelled due to uncleared valid bit\n");
+				return -ECANCELED;
+			}
 			xe_mmio_write32(mmio, GUC_TLB_INV_CR,
 					GUC_TLB_INV_CR_INVALIDATE);
 		}
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2026-04-22  2:50 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-18  0:03 [PATCH] drm/xe: Wait for HW clearance before issuing the next TLB inval fei.yang
2026-04-20 19:58 ` Summers, Stuart
2026-04-20 20:51 ` ✓ CI.KUnit: success for drm/xe: Wait for HW clearance before issuing the next TLB inval. (rev3) Patchwork
2026-04-20 21:42 ` ✗ Xe.CI.BAT: failure " Patchwork
2026-04-21  0:34 ` ✗ Xe.CI.FULL: " Patchwork
2026-04-22  2:40 ` [PATCH] drm/xe: Wait for HW clearance before issuing the next TLB inval Matthew Brost
2026-04-22  2:49   ` Matthew Brost
  -- strict thread matches above, loose matches on Subject: below --
2026-04-04  6:22 fei.yang
2026-04-06 18:59 ` Matthew Brost
2026-04-06 21:01 ` Matt Roper
2026-03-17 23:21 fei.yang
2026-03-17 23:28 ` Summers, Stuart
2026-03-22  5:35   ` Matthew Brost
2026-03-24 20:39     ` Yang, Fei
2026-03-24 20:53       ` Matthew Brost
2026-03-24 20:58         ` Matthew Brost
2026-03-24 21:10           ` Summers, Stuart
2026-03-24 23:36             ` Matthew Brost
2026-03-25 18:37               ` Summers, Stuart
2026-03-25 22:00                 ` Matthew Brost
2026-03-25 22:25                   ` Summers, Stuart
2026-03-25 22:38                     ` Matthew Brost
2026-03-25 22:43                       ` Summers, Stuart
2026-03-26  0:54                         ` Matthew Brost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox