Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Nishit Sharma <nishit.sharma@intel.com>
To: intel-xe@lists.freedesktop.org, himal.prasad.ghimiray@intel.com
Subject: [PATCH] [RFC]drm/xe: fix cross-tile SVM migration mapping on PVC and harden ENOSPC retry path
Date: Tue,  9 Jun 2026 08:40:03 +0000	[thread overview]
Message-ID: <20260609084003.70975-1-nishit.sharma@intel.com> (raw)

This update addresses correctness and stability issues in unified memory migration on
multi-tile (such as PVC) systems where multiple tiles share a single device context.
In that environment, local and peer memory paths can appear equivalent at the device level,
which may cause cross-tile traffic to use an address form that is only valid on the owning tile.
Under page-fault-driven migration, that can intermittently fail when one tile accesses pages
currently resident on another tile.
The fix introduces explicit peer-aware mapping semantics so cross-tile accesses are resolved
through a valid peer-accessible DMA path rather than tile-local addressing.

Signed-off-by: Nishit Sharma <nishit.sharma@intel.com>
---
 drivers/gpu/drm/drm_pagemap.c |  6 ++-
 drivers/gpu/drm/xe/xe_pt.c    | 14 +++--
 drivers/gpu/drm/xe/xe_svm.c   | 97 ++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_sync.c  | 13 +++++
 include/drm/drm_gpusvm.h      |  1 +
 include/drm/drm_pagemap.h     |  6 +++
 6 files changed, 132 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 15c78eca180b..e781cbdfa494 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -267,11 +267,15 @@ drm_pagemap_migrate_map_device_private_pages(struct device *dev,
 				goto next;
 
 			num_local_pages += NR_PAGES(order);
+			addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir);
 		} else {
 			num_peer_pages += NR_PAGES(order);
+			if (dpagemap->ops->peer_map)
+				addr = dpagemap->ops->peer_map(dpagemap, dev, page, order, dir);
+			else
+				addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir);
 		}
 
-		addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir);
 		if (dma_mapping_error(dev, addr.addr))
 			return -EFAULT;
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 2669ff5ee747..e0aed463373b 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -564,9 +564,17 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 					xe_walk->vma->gpuva.flags |=
 							XE_VMA_PTE_64K;
 					pte |= XE_PTE_PS64;
-				} else if (XE_WARN_ON(xe_walk->needs_64K &&
-					   is_vram)) {
-					return -EINVAL;
+				} else if (xe_walk->needs_64K &&
+					   is_vram) {
+					/*
+					 * needs_64K VM requires 64K PTEs for VRAM but
+					 * the 64K PTE cannot be formed: partial VRAM
+					 * eviction or partial migration created a mixed
+					 * SRAM+VRAM 64K window. Return -ENOSPC so the
+					 * page fault handler can evict remaining VRAM
+					 * and retry with SRAM-only mappings.
+					 */
+					return -ENOSPC;
 				}
 			}
 		}
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index e1651e70c8f0..150391c9878b 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -602,8 +602,39 @@ static int xe_svm_copy(struct page **pages,
 			vr = xe_page_to_vr(spage);
 			gt = xe_migrate_exec_queue(vr->migrate)->gt;
 			xe = vr->xe;
+		} else if (spage && xe_page_to_vr(spage) != vr) {
+			/*
+			 * On multi-tile devices (e.g. PVC/Xe_HPC), migrate_vma
+			 * may return VRAM pages from different tiles within the
+			 * same batch because all tiles share the same
+			 * pgmap_owner. Flush the current copy chunk using the
+			 * old tile before switching to the new tile's VRAM region.
+			 */
+			if (vram_addr != XE_VRAM_ADDR_INVALID) {
+				xe_svm_copy_kb_stats_incr(gt, dir,
+						(i - pos) * (PAGE_SIZE / SZ_1K));
+				if (sram)
+					__fence = xe_migrate_from_vram(vr->migrate,
+							i - pos, vram_addr,
+							&pagemap_addr[pos],
+							pre_migrate_fence);
+				else
+					__fence = xe_migrate_to_vram(vr->migrate,
+							i - pos, &pagemap_addr[pos],
+							vram_addr, pre_migrate_fence);
+				if (IS_ERR(__fence)) {
+					err = PTR_ERR(__fence);
+					goto err_out;
+				}
+				pre_migrate_fence = NULL;
+				dma_fence_put(fence);
+				fence = __fence;
+			}
+			vr = xe_page_to_vr(spage);
+			gt = xe_migrate_exec_queue(vr->migrate)->gt;
+			xe = vr->xe;
+			vram_addr = XE_VRAM_ADDR_INVALID;
 		}
-		XE_WARN_ON(spage && xe_page_to_vr(spage) != vr);
 
 		/*
 		 * CPU page and device page valid, capture physical address on
@@ -1148,6 +1179,14 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
 
 	if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
 		drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
+		if (!supports_4K_migration(vm->xe) &&
+		   (!IS_ALIGNED(xe_svm_range_start(range), SZ_64K) ||
+		   !IS_ALIGNED(range_size, SZ_64K))) {
+			drm_dbg(&vm->xe->drm,
+				"Range not 64K-aligned for VRAM migration, start=0x%lx size=0x%lx\n",
+				xe_svm_range_start(range), (unsigned long)range_size);
+			return false;
+		}
 		return false;
 	}
 
@@ -1227,6 +1266,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
 	ktime_t start = xe_gt_stats_ktime_get(), bind_start, get_pages_start;
 	int err;
 
+	ctx.local_dpagemap = xe_tile_local_pagemap(tile);
 	lockdep_assert_held_write(&vm->lock);
 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
 
@@ -1360,6 +1400,45 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
 		goto retry;
 	}
 
+	if (err == -ENOSPC) {
+		/*
+		 * 64K PTE not possible for needs_64K VM: mixed SRAM+VRAM in
+		 * the DMA address array prevented xe_pt_scan_64K from forming
+		 * a 64K PTE.  Three invariants must hold for recovery:
+		 *
+		 * 1. Evict unconditionally via drm_gpusvm_range_evict: the
+		 *    advisory has_devmem_pages flag may be stale (false) while
+		 *    VRAM DMA entries still exist in the dma_addr array.
+		 *
+		 * 2. Explicitly unmap pages after eviction.
+		 *    drm_gpusvm_range_evict clears has_dma_mapping via the MMU
+		 *    notifier ONLY when a migration actually occurs.  On a
+		 *    second recovery attempt the pages may already be in SRAM
+		 *    (no migration, no notifier), leaving has_dma_mapping true
+		 *    with stale VRAM entries in dma_addr.
+		 *    drm_gpusvm_range_unmap_pages clears this unconditionally,
+		 *    guaranteeing that drm_gpusvm_get_pages takes the full HMM
+		 *    path and returns fresh SRAM DMA entries.
+		 *
+		 * 3. Use goto retry (not goto get_pages) to re-lookup the
+		 *    range with fresh dpagemap and device_private_page_owner.
+		 *    After eviction the notifier sets tile_invalidated so
+		 *    xe_svm_range_is_valid returns false and we proceed to
+		 *    get_pages.  --migrate_try_count becomes -1 (< 0) at
+		 *    retry, skipping VRAM re-migration.
+		 */
+		range_debug(range, "PAGE FAULT - EVICT PARTIAL VRAM RETRY");
+		drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
+		drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, &range->base,
+				&ctx);
+		ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+		ctx.devmem_only = false;
+		ctx.devmem_possible = false;
+		ctx.check_pages_threshold = 0;
+		migrate_try_count = 0;
+		goto retry;
+	}
+
 	return err;
 }
 
@@ -1695,6 +1774,21 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
 	return drm_pagemap_addr_encode(addr, prot, order, dir);
 }
 
+static struct drm_pagemap_addr
+xe_drm_pagemap_peer_map(struct drm_pagemap *dpagemap,
+			struct device *dev,
+			struct page *page,
+			unsigned int order,
+			enum dma_data_direction dir)
+{
+        dma_addr_t addr = dma_map_resource(dev,
+					   xe_page_to_pcie(page),
+					   PAGE_SIZE << order, dir,
+					   DMA_ATTR_SKIP_CPU_SYNC);
+
+	return drm_pagemap_addr_encode(addr, XE_INTERCONNECT_P2P, order, dir);
+}
+
 static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap,
 					struct device *dev,
 					const struct drm_pagemap_addr *addr)
@@ -1742,6 +1836,7 @@ static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or
 
 static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
 	.device_map = xe_drm_pagemap_device_map,
+	.peer_map = xe_drm_pagemap_peer_map,
 	.device_unmap = xe_drm_pagemap_device_unmap,
 	.populate_mm = xe_drm_pagemap_populate_mm,
 	.destroy = xe_pagemap_destroy,
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 37866768d64c..d2aa888f2d03 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -346,6 +346,7 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
 
 	if (q->flags & EXEC_QUEUE_FLAG_VM) {
 		struct xe_exec_queue *__q;
+#if 0
 		struct xe_tile *tile;
 		u8 id;
 
@@ -354,6 +355,18 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
 			for_each_tlb_inval(i)
 				num_fence++;
 		}
+#endif
+		/* Count fences for main queue */
+		num_fence++;
+		for_each_tlb_inval(i)
+			num_fence++;
+
+		/* Count fences for main queue */
+		list_for_each_entry(__q, &q->multi_gt_list, multi_gt_link) {
+			num_fence++;
+			for_each_tlb_inval(i)
+				num_fence++;
+		}
 
 		fences = kmalloc_objs(*fences, num_fence);
 		if (!fences)
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index 8a4d7134a9a7..6f03111fc8ef 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -250,6 +250,7 @@ struct drm_gpusvm_ctx {
 	unsigned int devmem_possible :1;
 	unsigned int devmem_only :1;
 	unsigned int allow_mixed :1;
+	struct drm_pagemap *local_dpagemap;
 };
 
 int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
index 95eb4b66b057..414a8a19bfe7 100644
--- a/include/drm/drm_pagemap.h
+++ b/include/drm/drm_pagemap.h
@@ -87,6 +87,12 @@ struct drm_pagemap_ops {
 					      unsigned int order,
 					      enum dma_data_direction dir);
 
+	struct drm_pagemap_addr (*peer_map)(struct drm_pagemap *dpagemap,
+					    struct device *dev,
+					    struct page *page,
+					    unsigned int order,
+					    enum dma_data_direction dir);
+
 	/**
 	 * @device_unmap: Unmap a device address previously obtained using @device_map.
 	 *
-- 
2.43.0


             reply	other threads:[~2026-06-09  8:40 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-09  8:40 Nishit Sharma [this message]
2026-06-09  9:27 ` ✗ CI.checkpatch: warning for drm/xe: fix cross-tile SVM migration mapping on PVC and harden ENOSPC retry path Patchwork
2026-06-09  9:28 ` ✓ CI.KUnit: success " Patchwork
2026-06-09 10:39 ` ✓ Xe.CI.BAT: " Patchwork
2026-06-09 22:00 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-06-10  1:54 ` [PATCH] [RFC]drm/xe: " Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260609084003.70975-1-nishit.sharma@intel.com \
    --to=nishit.sharma@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox