[PATCH v1 3/4] drm/xe/vf: Wait for default LRCs fixups before using

public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed

From: Tomasz Lis <tomasz.lis@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Michał Winiarski" <michal.winiarski@intel.com>,
	"Michał Wajdeczko" <michal.wajdeczko@intel.com>,
	"Piotr Piórkowski" <piotr.piorkowski@intel.com>,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Lucas De Marchi" <lucas.demarchi@intel.com>
Subject: [PATCH v1 3/4] drm/xe/vf: Wait for default LRCs fixups before using
Date: Fri,  6 Feb 2026 15:53:33 +0100	[thread overview]
Message-ID: <20260206145334.674679-4-tomasz.lis@intel.com> (raw)
In-Reply-To: <20260206145334.674679-1-tomasz.lis@intel.com>

When a context is being created during save/restore, the LRC creation
needs to wait for GGTT address space to be shifted. But it also needs
to have fixed default LRCs. This is mandatory to avoid the situation
where LRC will be created based on data from before the fixups, but
reference within exec queue will be set too late for fixups.

This fixes an issue where contexts created during save/restore have
a large chance of having one unfixed LRC, due to the xe_lrc_create()
being synced for equal start to race with default LRC fixups.

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c        |  2 +-
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c       | 24 +++++++++++------------
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h       |  2 +-
 drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h |  4 ++--
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index e9396ad3390a..6eb561086e1c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -309,7 +309,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
 	for (i = 0; i < q->width; ++i) {
 		struct xe_lrc *lrc;
 
-		xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
+		xe_gt_sriov_vf_wait_valid_default_lrc(q->gt);
 		lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state,
 				    xe_lrc_ring_size(), q->msix_vec, flags);
 		if (IS_ERR(lrc)) {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 30e8c2cf5f09..1edccee84c76 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -529,12 +529,6 @@ static int vf_get_ggtt_info(struct xe_gt *gt)
 		xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift);
 	}
 
-	if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) {
-		WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
-		smp_wmb();	/* Ensure above write visible before wake */
-		wake_up_all(&gt->sriov.vf.migration.wq);
-	}
-
 	return 0;
 }
 
@@ -837,6 +831,10 @@ static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt)
 
 	for_each_hw_engine(hwe, gt, id)
 		xe_default_lrc_update_memirq_regs_with_address(hwe);
+
+	WRITE_ONCE(gt->sriov.vf.migration.default_lrcs_need_fixes, false);
+	smp_wmb();	/* Ensure above write visible before wake */
+	wake_up_all(&gt->sriov.vf.migration.wq);
 }
 
 static void vf_start_migration_recovery(struct xe_gt *gt)
@@ -851,7 +849,7 @@ static void vf_start_migration_recovery(struct xe_gt *gt)
 	    !gt->sriov.vf.migration.recovery_teardown) {
 		gt->sriov.vf.migration.recovery_queued = true;
 		WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true);
-		WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true);
+		WRITE_ONCE(gt->sriov.vf.migration.default_lrcs_need_fixes, true);
 		smp_wmb();	/* Ensure above writes visible before wake */
 
 		xe_guc_ct_wake_waiters(&gt->uc.guc.ct);
@@ -1296,7 +1294,7 @@ static void vf_post_migration_abort(struct xe_gt *gt)
 {
 	spin_lock_irq(&gt->sriov.vf.migration.lock);
 	WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
-	WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
+	WRITE_ONCE(gt->sriov.vf.migration.default_lrcs_need_fixes, false);
 	spin_unlock_irq(&gt->sriov.vf.migration.lock);
 
 	wake_up_all(&gt->sriov.vf.migration.wq);
@@ -1492,7 +1490,7 @@ bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt)
 	return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress);
 }
 
-static bool vf_valid_ggtt(struct xe_gt *gt)
+static bool vf_valid_default_lrc(struct xe_gt *gt)
 {
 	struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
 	bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) &&
@@ -1500,17 +1498,17 @@ static bool vf_valid_ggtt(struct xe_gt *gt)
 
 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
 
-	if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes))
+	if (irq_pending || READ_ONCE(gt->sriov.vf.migration.default_lrcs_need_fixes))
 		return false;
 
 	return true;
 }
 
 /**
- * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses
+ * xe_gt_sriov_vf_wait_valid_default_lrc() - wait for valid GGTT refs in default LRCs
  * @gt: the &xe_gt
  */
-void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt)
+void xe_gt_sriov_vf_wait_valid_default_lrc(struct xe_gt *gt)
 {
 	int ret;
 
@@ -1519,7 +1517,7 @@ void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt)
 		return;
 
 	ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq,
-					       vf_valid_ggtt(gt),
+					       vf_valid_default_lrc(gt),
 					       HZ * 5);
 	xe_gt_WARN_ON(gt, !ret);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index 7d97189c2d3d..70232dc38f9a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -39,6 +39,6 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p);
 
-void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt);
+void xe_gt_sriov_vf_wait_valid_default_lrc(struct xe_gt *gt);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
index 4ef881b9b662..8be181bf3cf3 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
@@ -73,8 +73,8 @@ struct xe_gt_sriov_vf_migration {
 	bool recovery_queued;
 	/** @recovery_inprogress: VF post migration recovery in progress */
 	bool recovery_inprogress;
-	/** @ggtt_need_fixes: VF GGTT needs fixes */
-	bool ggtt_need_fixes;
+	/** @default_lrcs_need_fixes: GGTT refs within default LRCs need fixes */
+	bool default_lrcs_need_fixes;
 };
 
 /**
-- 
2.25.1

next prev parent reply	other threads:[~2026-02-06 14:49 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-06 14:53 [PATCH v1 0/4] drm/xe/vf: Fix exec queue creation during post-migration recovery Tomasz Lis
2026-02-06 14:53 ` [PATCH v1 1/4] drm/xe/queue: Call fini on exec queue creation fail Tomasz Lis
2026-02-06 17:38   ` Matthew Brost
2026-02-06 14:53 ` [PATCH v1 2/4] drm/xe/vf: Avoid LRC being freed while applying fixups Tomasz Lis
2026-02-06 17:46   ` Matthew Brost
2026-02-10 20:16     ` Lis, Tomasz
2026-02-06 14:53 ` Tomasz Lis [this message]
2026-02-06 18:11   ` [PATCH v1 3/4] drm/xe/vf: Wait for default LRCs fixups before using Matthew Brost
2026-02-10 20:11     ` Lis, Tomasz
2026-02-18 23:15       ` Lis, Tomasz
2026-02-06 14:53 ` [PATCH v1 4/4] drm/xe/vf: Redo LRC creation while in VF fixups Tomasz Lis
2026-02-06 14:56 ` ✓ CI.KUnit: success for drm/xe/vf: Fix exec queue creation during post-migration recovery Patchwork
2026-02-06 15:29 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-07 15:42 ` ✗ Xe.CI.FULL: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e9396ad3390 dfblob:6eb561086e1 dfblob:30e8c2cf5f0
dfblob:1edccee84c7 dfblob:7d97189c2d3 dfblob:70232dc38f9
dfblob:4ef881b9b66 dfblob:8be181bf3cf )
 OR (
bs:"[PATCH v1 3/4] drm/xe/vf: Wait for default LRCs fixups before using" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260206145334.674679-4-tomasz.lis@intel.com \
    --to=tomasz.lis@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=lucas.demarchi@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=michal.wajdeczko@intel.com \
    --cc=michal.winiarski@intel.com \
    --cc=piotr.piorkowski@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox