[PATCH 5/5] drm/xe/uapi, drm/xe: Make the PT code handle placement per PTE rather than per vma / range

intel-xe.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed

From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	himal.prasad.ghimiray@intel.com,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Matthew Auld" <matthew.auld@intel.com>
Subject: [PATCH 5/5] drm/xe/uapi, drm/xe: Make the PT code handle placement per PTE rather than per vma / range
Date: Mon, 17 Mar 2025 11:41:32 +0100	[thread overview]
Message-ID: <20250317104132.8200-6-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20250317104132.8200-1-thomas.hellstrom@linux.intel.com>

With SVM, ranges forwarded to the PT code for binding can, mostly
due to races when migrating, point to both VRAM and system / foreign
device memory. Make the PT code able to handle that by checking,
for each PTE set up, whether it points to local VRAM or to system
memory.

The UAPI is changed implicitly in that before this patch,
global atomics required a bo with VRAM/System placements. With
this patch that is changed to requiring LR mode, and
if the required placement is not available upon GPU atomic access
pagefault, an error will be generated and the VM banned.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c |  12 +++--
 drivers/gpu/drm/xe/xe_pt.c | 106 ++++++++++++++++---------------------
 2 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index c7c206041632..3ebe9dd2f2e5 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2091,10 +2091,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 
-	if (res->mem_type == XE_PL_STOLEN)
+	switch (res->mem_type) {
+	case XE_PL_STOLEN:
 		return xe_ttm_stolen_gpu_offset(xe);
-
-	return res_to_mem_region(res)->dpa_base;
+	case XE_PL_TT:
+	case XE_PL_SYSTEM:
+		return 0;
+	default:
+		return res_to_mem_region(res)->dpa_base;
+	}
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 9e719535a3bb..ea4d4d340698 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -278,13 +278,15 @@ struct xe_pt_stage_bind_walk {
 	struct xe_vm *vm;
 	/** @tile: The tile we're building for. */
 	struct xe_tile *tile;
-	/** @default_pte: PTE flag only template. No address is associated */
-	u64 default_pte;
+	/** @default_pte: PTE flag only template for VRAM. No address is associated */
+	u64 default_vram_pte;
+	/** @default_pte: PTE flag only template for VRAM. No address is associated */
+	u64 default_system_pte;
 	/** @dma_offset: DMA offset to add to the PTE. */
 	u64 dma_offset;
 	/**
 	 * @needs_64k: This address range enforces 64K alignment and
-	 * granularity.
+	 * granularity on VRAM.
 	 */
 	bool needs_64K;
 	/**
@@ -515,13 +517,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
 		struct xe_res_cursor *curs = xe_walk->curs;
 		bool is_null = xe_vma_is_null(xe_walk->vma);
+		bool is_vram = is_null ? false : xe_res_is_vram(curs);
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
 		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
 						 xe_res_dma(curs) + xe_walk->dma_offset,
 						 xe_walk->vma, pat_index, level);
-		pte |= xe_walk->default_pte;
+		if (!is_null)
+			pte |= is_vram ? xe_walk->default_vram_pte :
+				xe_walk->default_system_pte;
 
 		/*
 		 * Set the XE_PTE_PS64 hint if possible, otherwise if
@@ -531,7 +536,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
 				xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
 				pte |= XE_PTE_PS64;
-			} else if (XE_WARN_ON(xe_walk->needs_64K)) {
+			} else if (XE_WARN_ON(xe_walk->needs_64K && is_vram)) {
 				return -EINVAL;
 			}
 		}
@@ -603,6 +608,31 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
 	.pt_entry = xe_pt_stage_bind_entry,
 };
 
+/* The GPU can always to atomics in VRAM */
+static bool xe_atomic_for_vram(struct xe_vm *vm)
+{
+	return true;
+}
+
+/*
+ * iGFX always expect to be able to do atomics in system.
+ *
+ * For DGFX, 3D clients want to do atomics in system that is
+ * not coherent with CPU atomics. Compute clients want
+ * atomics that look coherent with CPU atomics. We
+ * distinguish the two by checking for lr mode. For
+ * compute we then disallow atomics in system.
+ * Compute attempts to perform atomics in system memory would
+ * then cause an unrecoverable page-fault in preempt-fence
+ * mode, but in fault mode the data would be migrated to VRAM
+ * for GPU atomics and to system for CPU atomics.
+ */
+static bool xe_atomic_for_system(struct xe_vm *vm)
+{
+	return (!IS_DGFX(vm->xe) || !xe_vm_in_lr_mode(vm)) &&
+		!vm->xe->info.has_device_atomics_on_smem;
+}
+
 /**
  * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
  * range.
@@ -629,9 +659,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_bo *bo = xe_vma_bo(vma);
-	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
-		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
 	struct xe_res_cursor curs;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_pt_stage_bind_walk xe_walk = {
 		.base = {
 			.ops = &xe_pt_stage_bind_ops,
@@ -639,7 +668,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 			.staging = true,
 		},
-		.vm = xe_vma_vm(vma),
+		.vm = vm,
 		.tile = tile,
 		.curs = &curs,
 		.va_curs_start = range ? range->base.itree.start :
@@ -647,26 +676,22 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.vma = vma,
 		.wupd.entries = entries,
 	};
-	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	struct xe_pt *pt = vm->pt_root[tile->id];
 	int ret;
 
 	if (range) {
 		/* Move this entire thing to xe_svm.c? */
-		xe_svm_notifier_lock(xe_vma_vm(vma));
+		xe_svm_notifier_lock(vm);
 		if (!xe_svm_range_pages_valid(range)) {
 			xe_svm_range_debug(range, "BIND PREPARE - RETRY");
-			xe_svm_notifier_unlock(xe_vma_vm(vma));
+			xe_svm_notifier_unlock(vm);
 			return -EAGAIN;
 		}
 		if (xe_svm_range_has_dma_mapping(range)) {
 			xe_res_first_dma(range->base.dma_addr, 0,
 					 range->base.itree.last + 1 - range->base.itree.start,
 					 &curs);
-			is_devmem = xe_res_is_vram(&curs);
-			if (is_devmem)
-				xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM");
-			else
-				xe_svm_range_debug(range, "BIND PREPARE - DMA");
+			xe_svm_range_debug(range, "BIND PREPARE - MIXED");
 		} else {
 			xe_assert(xe, false);
 		}
@@ -674,54 +699,17 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 * Note, when unlocking the resource cursor dma addresses may become
 		 * stale, but the bind will be aborted anyway at commit time.
 		 */
-		xe_svm_notifier_unlock(xe_vma_vm(vma));
+		xe_svm_notifier_unlock(vm);
 	}
 
-	xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem;
-
-	/**
-	 * Default atomic expectations for different allocation scenarios are as follows:
-	 *
-	 * 1. Traditional API: When the VM is not in LR mode:
-	 *    - Device atomics are expected to function with all allocations.
-	 *
-	 * 2. Compute/SVM API: When the VM is in LR mode:
-	 *    - Device atomics are the default behavior when the bo is placed in a single region.
-	 *    - In all other cases device atomics will be disabled with AE=0 until an application
-	 *      request differently using a ioctl like madvise.
-	 */
+	xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K);
 	if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
-		if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
-			if (bo && xe_bo_has_single_placement(bo))
-				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-			/**
-			 * If a SMEM+LMEM allocation is backed by SMEM, a device
-			 * atomics will cause a gpu page fault and which then
-			 * gets migrated to LMEM, bind such allocations with
-			 * device atomics enabled.
-			 */
-			else if (is_devmem)
-				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		} else {
-			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		}
-
-		/**
-		 * Unset AE if the platform(PVC) doesn't support it on an
-		 * allocation
-		 */
-		if (!xe->info.has_device_atomics_on_smem && !is_devmem)
-			xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+		xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
+		xe_walk.default_system_pte = xe_atomic_for_system(vm) ? XE_USM_PPGTT_PTE_AE : 0;
 	}
 
-	if (is_devmem) {
-		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
-		xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
-	}
-
-	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
-		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
-
+	xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
+	xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
 	if (!range)
 		xe_bo_assert_held(bo);
 
-- 
2.48.1

next prev parent reply	other threads:[~2025-03-17 10:42 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-17 10:41 [PATCH 0/5] drm/xe: xe-only patches from the multi-device GPUSVM series Thomas Hellström
2025-03-17 10:41 ` [PATCH 1/5] drm/xe: Introduce CONFIG_DRM_XE_GPUSVM Thomas Hellström
2025-03-20 21:24   ` Matthew Brost
2025-03-17 10:41 ` [PATCH 2/5] drm/xe/svm: Fix a potential bo UAF Thomas Hellström
2025-03-20 21:25   ` Matthew Brost
2025-03-17 10:41 ` [PATCH 3/5] drm/xe/bo: Add a bo remove callback Thomas Hellström
2025-03-17 15:58   ` Matthew Auld
2025-03-17 16:16     ` Thomas Hellström
2025-03-17 16:50       ` Matthew Auld
2025-03-17 17:07         ` Thomas Hellström
2025-03-17 17:35           ` Matthew Auld
2025-03-17 10:41 ` [PATCH 4/5] drm/xe/migrate: Allow xe_migrate_vram() also on non-pagefault capable devices Thomas Hellström
2025-03-20 21:26   ` Matthew Brost
2025-03-17 10:41 ` Thomas Hellström [this message]
2025-03-17 15:19 ` ✗ CI.Patch_applied: failure for drm/xe: xe-only patches from the multi-device GPUSVM series Patchwork
2025-03-19 10:50 ` ✓ CI.Patch_applied: success for drm/xe: xe-only patches from the multi-device GPUSVM series (rev2) Patchwork
2025-03-19 10:50 ` ✗ CI.checkpatch: warning " Patchwork
2025-03-19 10:51 ` ✓ CI.KUnit: success " Patchwork
2025-03-19 11:08 ` ✓ CI.Build: " Patchwork
2025-03-19 11:10 ` ✗ CI.Hooks: failure " Patchwork
2025-03-19 11:11 ` ✓ CI.checksparse: success " Patchwork
2025-03-19 11:32 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-03-19 12:30 ` ✗ Xe.CI.Full: " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:c7c20604163 dfblob:3ebe9dd2f2e dfblob:9e719535a3b
dfblob:ea4d4d34069 )
 OR (
bs:"[PATCH 5/5] drm/xe/uapi, drm/xe: Make the PT code handle placement per PTE rather than per vma / range" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250317104132.8200-6-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).