[RFC PATCH 12/19] drm/xe: Make the PT code handle placement per PTE rather than per vma / range

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	himal.prasad.ghimiray@intel.com, apopple@nvidia.com,
	airlied@gmail.com, "Simona Vetter" <simona.vetter@ffwll.ch>,
	felix.kuehling@amd.com, "Matthew Brost" <matthew.brost@intel.com>,
	"Christian König" <christian.koenig@amd.com>,
	dakr@kernel.org, "Mrozek, Michal" <michal.mrozek@intel.com>,
	"Joonas Lahtinen" <joonas.lahtinen@linux.intel.com>
Subject: [RFC PATCH 12/19] drm/xe: Make the PT code handle placement per PTE rather than per vma / range
Date: Wed, 12 Mar 2025 22:04:09 +0100	[thread overview]
Message-ID: <20250312210416.3120-13-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20250312210416.3120-1-thomas.hellstrom@linux.intel.com>

With SVM, ranges forwarded to the PT code for binding can, mostly
due to races when migrating, point to both VRAM and system / foreign
device memory. Make the PT code able to handle that by checking,
for each PTE set up, whether it points to local VRAM or to system
memory.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c |  12 +++--
 drivers/gpu/drm/xe/xe_pt.c | 106 ++++++++++++++++---------------------
 2 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 390f90fbd366..bec788bce95c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2091,10 +2091,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 
-	if (res->mem_type == XE_PL_STOLEN)
+	switch (res->mem_type) {
+	case XE_PL_STOLEN:
 		return xe_ttm_stolen_gpu_offset(xe);
-
-	return res_to_mem_region(res)->dpa_base;
+	case XE_PL_TT:
+	case XE_PL_SYSTEM:
+		return 0;
+	default:
+		return res_to_mem_region(res)->dpa_base;
+	}
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 9e719535a3bb..d14b1a28474a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -278,13 +278,15 @@ struct xe_pt_stage_bind_walk {
 	struct xe_vm *vm;
 	/** @tile: The tile we're building for. */
 	struct xe_tile *tile;
-	/** @default_pte: PTE flag only template. No address is associated */
-	u64 default_pte;
+	/** @default_pte: PTE flag only template for VRAM. No address is associated */
+	u64 default_vram_pte;
+	/** @default_pte: PTE flag only template for VRAM. No address is associated */
+	u64 default_system_pte;
 	/** @dma_offset: DMA offset to add to the PTE. */
 	u64 dma_offset;
 	/**
 	 * @needs_64k: This address range enforces 64K alignment and
-	 * granularity.
+	 * granularity on VRAM.
 	 */
 	bool needs_64K;
 	/**
@@ -515,13 +517,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
 		struct xe_res_cursor *curs = xe_walk->curs;
 		bool is_null = xe_vma_is_null(xe_walk->vma);
+		bool is_vram = xe_res_is_vram(curs);
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
 		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
 						 xe_res_dma(curs) + xe_walk->dma_offset,
 						 xe_walk->vma, pat_index, level);
-		pte |= xe_walk->default_pte;
+		if (!is_null)
+			pte |= is_vram ? xe_walk->default_vram_pte :
+				xe_walk->default_system_pte;
 
 		/*
 		 * Set the XE_PTE_PS64 hint if possible, otherwise if
@@ -531,7 +536,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
 				xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
 				pte |= XE_PTE_PS64;
-			} else if (XE_WARN_ON(xe_walk->needs_64K)) {
+			} else if (XE_WARN_ON(xe_walk->needs_64K && is_vram)) {
 				return -EINVAL;
 			}
 		}
@@ -603,6 +608,31 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
 	.pt_entry = xe_pt_stage_bind_entry,
 };
 
+/* The GPU can always to atomics in VRAM */
+static bool xe_atomic_for_vram(struct xe_vm *vm)
+{
+	return true;
+}
+
+/*
+ * iGFX always expect to be able to do atomics in system.
+ *
+ * For DGFX, 3D clients want to do atomics in system that is
+ * not coherent with CPU atomics. Compute clients want
+ * atomics that look coherent with CPU atomics. We
+ * distinguish the two by checking for lr mode. For
+ * compute we then disallow atomics in system.
+ * Compute attempts to perform atomics in system memory would
+ * then cause an unrecoverable page-fault in preempt-fence
+ * mode, but in fault mode the data would be migrated to VRAM
+ * for GPU atomics and to system for CPU atomics.
+ */
+static bool xe_atomic_for_system(struct xe_vm *vm)
+{
+	return (!IS_DGFX(vm->xe) || !xe_vm_in_lr_mode(vm)) &&
+		!vm->xe->info.has_device_atomics_on_smem;
+}
+
 /**
  * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
  * range.
@@ -629,9 +659,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_bo *bo = xe_vma_bo(vma);
-	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
-		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
 	struct xe_res_cursor curs;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_pt_stage_bind_walk xe_walk = {
 		.base = {
 			.ops = &xe_pt_stage_bind_ops,
@@ -639,7 +668,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 			.staging = true,
 		},
-		.vm = xe_vma_vm(vma),
+		.vm = vm,
 		.tile = tile,
 		.curs = &curs,
 		.va_curs_start = range ? range->base.itree.start :
@@ -647,26 +676,22 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.vma = vma,
 		.wupd.entries = entries,
 	};
-	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	struct xe_pt *pt = vm->pt_root[tile->id];
 	int ret;
 
 	if (range) {
 		/* Move this entire thing to xe_svm.c? */
-		xe_svm_notifier_lock(xe_vma_vm(vma));
+		xe_svm_notifier_lock(vm);
 		if (!xe_svm_range_pages_valid(range)) {
 			xe_svm_range_debug(range, "BIND PREPARE - RETRY");
-			xe_svm_notifier_unlock(xe_vma_vm(vma));
+			xe_svm_notifier_unlock(vm);
 			return -EAGAIN;
 		}
 		if (xe_svm_range_has_dma_mapping(range)) {
 			xe_res_first_dma(range->base.dma_addr, 0,
 					 range->base.itree.last + 1 - range->base.itree.start,
 					 &curs);
-			is_devmem = xe_res_is_vram(&curs);
-			if (is_devmem)
-				xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM");
-			else
-				xe_svm_range_debug(range, "BIND PREPARE - DMA");
+			xe_svm_range_debug(range, "BIND PREPARE - MIXED");
 		} else {
 			xe_assert(xe, false);
 		}
@@ -674,54 +699,17 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 * Note, when unlocking the resource cursor dma addresses may become
 		 * stale, but the bind will be aborted anyway at commit time.
 		 */
-		xe_svm_notifier_unlock(xe_vma_vm(vma));
+		xe_svm_notifier_unlock(vm);
 	}
 
-	xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem;
-
-	/**
-	 * Default atomic expectations for different allocation scenarios are as follows:
-	 *
-	 * 1. Traditional API: When the VM is not in LR mode:
-	 *    - Device atomics are expected to function with all allocations.
-	 *
-	 * 2. Compute/SVM API: When the VM is in LR mode:
-	 *    - Device atomics are the default behavior when the bo is placed in a single region.
-	 *    - In all other cases device atomics will be disabled with AE=0 until an application
-	 *      request differently using a ioctl like madvise.
-	 */
+	xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K);
 	if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
-		if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
-			if (bo && xe_bo_has_single_placement(bo))
-				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-			/**
-			 * If a SMEM+LMEM allocation is backed by SMEM, a device
-			 * atomics will cause a gpu page fault and which then
-			 * gets migrated to LMEM, bind such allocations with
-			 * device atomics enabled.
-			 */
-			else if (is_devmem)
-				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		} else {
-			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		}
-
-		/**
-		 * Unset AE if the platform(PVC) doesn't support it on an
-		 * allocation
-		 */
-		if (!xe->info.has_device_atomics_on_smem && !is_devmem)
-			xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+		xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
+		xe_walk.default_system_pte = xe_atomic_for_system(vm) ? XE_USM_PPGTT_PTE_AE : 0;
 	}
 
-	if (is_devmem) {
-		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
-		xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
-	}
-
-	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
-		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
-
+	xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
+	xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
 	if (!range)
 		xe_bo_assert_held(bo);
 
-- 
2.48.1

next prev parent reply	other threads:[~2025-03-12 21:05 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-12 21:03 [RFC PATCH 00/19] drm, drm/xe: Multi-device GPUSVM Thomas Hellström
2025-03-12 21:03 ` [RFC PATCH 01/19] drm/xe: Introduce CONFIG_DRM_XE_GPUSVM Thomas Hellström
2025-03-12 21:03 ` [RFC PATCH 02/19] drm/xe/svm: Fix a potential bo UAF Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 03/19] drm/gpusvm, drm/pagemap: Move migration functionality to drm_pagemap Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 04/19] drm/pagemap: Add a populate_mm op Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 05/19] drm/xe: Implement and use the drm_pagemap " Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 06/19] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap and manage lifetime Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 07/19] drm/pagemap: Get rid of the struct drm_pagemap_zdd::device_private_page_owner field Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 08/19] drm/xe/bo: Add a bo remove callback Thomas Hellström
2025-03-14 13:05   ` Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 09/19] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 10/19] drm/gpusvm, drm/xe: Move the device private owner to the drm_gpusvm_ctx Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 11/19] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner Thomas Hellström
2025-03-12 21:04 ` Thomas Hellström [this message]
2025-03-12 21:04 ` [RFC PATCH 13/19] drm/gpusvm: Allow mixed mappings Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 14/19] drm/xe: Add a preferred dpagemap Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 15/19] drm/pagemap/util: Add file descriptors pointing to struct drm_pagemap Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 16/19] drm/xe/migrate: Allow xe_migrate_vram() also on non-pagefault capable devices Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 17/19] drm/xe/uapi: Add the devmem_open ioctl Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 18/19] drm/xe/uapi: HAX: Add the xe_madvise_prefer_devmem IOCTL Thomas Hellström
2025-03-12 21:04 ` [RFC PATCH 19/19] drm/xe: HAX: Use pcie p2p dma to test fast interconnect Thomas Hellström
2025-03-12 21:10 ` ✓ CI.Patch_applied: success for drm, drm/xe: Multi-device GPUSVM Patchwork
2025-03-12 21:11 ` ✗ CI.checkpatch: warning " Patchwork
2025-03-12 21:12 ` ✓ CI.KUnit: success " Patchwork
2025-03-12 21:29 ` ✓ CI.Build: " Patchwork
2025-03-12 21:31 ` ✗ CI.Hooks: failure " Patchwork
2025-03-12 21:33 ` ✓ CI.checksparse: success " Patchwork
2025-03-12 22:06 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-03-13 10:19 ` [RFC PATCH 00/19] " Christian König
2025-03-13 12:50   ` Thomas Hellström
2025-03-13 12:57     ` Christian König
2025-03-13 15:55       ` Thomas Hellström
2025-03-17  9:20       ` Thomas Hellström
2025-03-13 13:24 ` ✗ Xe.CI.Full: failure for " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:390f90fbd36 dfblob:bec788bce95 dfblob:9e719535a3b
dfblob:d14b1a28474 )
 OR (
bs:"[RFC PATCH 12/19] drm/xe: Make the PT code handle placement per PTE rather than per vma / range" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250312210416.3120-13-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=airlied@gmail.com \
    --cc=apopple@nvidia.com \
    --cc=christian.koenig@amd.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=felix.kuehling@amd.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=matthew.brost@intel.com \
    --cc=michal.mrozek@intel.com \
    --cc=simona.vetter@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox