Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Oak Zeng <oak.zeng@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Thomas.Hellstrom@linux.intel.com, matthew.brost@intel.com,
	jonathan.cavitt@intel.com
Subject: [PATCH 2/3] drm/xe: Clear scratch page on vm_bind
Date: Wed,  5 Feb 2025 21:11:25 -0500	[thread overview]
Message-ID: <20250206021126.3420-2-oak.zeng@intel.com> (raw)
In-Reply-To: <20250206021126.3420-1-oak.zeng@intel.com>

When a vm runs under fault mode, if scratch page is enabled, we need
to clear the scratch page mapping on vm_bind for the vm_bind address
range. Under fault mode, we depend on recoverable page fault to
establish mapping in page table. If scratch page is not cleared, GPU
access of address won't cause page fault because it always hits the
existing scratch page mapping.

When vm_bind with IMMEDIATE flag, there is no need of clearing as
immediate bind can overwrite the scratch page mapping.

So far only is xe2 and xe3 products are allowed to enable scratch page
under fault mode. On other platform we don't allow scratch page under
fault mode, so no need of such clearing.

v2: Rework vm_bind pipeline to clear scratch page mapping. This is similar
to a map operation, with the exception that PTEs are cleared instead of
pointing to valid physical pages. (Matt, Thomas)

TLB invalidation is needed after clear scratch page mapping as larger
scratch page mapping could be backed by physical page and cached in
TLB. (Matt, Thomas)

v3: Fix the case of clearing huge pte (Thomas)

Improve commit message (Thomas)

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c       | 47 ++++++++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_pt_types.h |  2 ++
 drivers/gpu/drm/xe/xe_vm.c       | 29 +++++++++++++++++---
 drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
 4 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 1ddcc7e79a93..52137b43a948 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -268,6 +268,8 @@ struct xe_pt_stage_bind_walk {
 	 * granularity.
 	 */
 	bool needs_64K;
+	/* @clear_pt: clear page table entries during the bind walk */
+	bool clear_pt;
 	/**
 	 * @vma: VMA being mapped
 	 */
@@ -415,6 +417,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
 	if (xe_vma_is_null(xe_walk->vma))
 		return true;
 
+	/* if we are clearing page table, no dma addresses*/
+	if (xe_walk->clear_pt)
+		return true;
+
 	/* Is the DMA address huge PTE size aligned? */
 	size = next - addr;
 	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
@@ -498,15 +504,18 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
 		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
-						 xe_res_dma(curs) + xe_walk->dma_offset,
-						 xe_walk->vma, pat_index, level);
+				xe_res_dma(curs) + xe_walk->dma_offset,
+				xe_walk->vma, pat_index, level);
 		pte |= xe_walk->default_pte;
 
+		if (xe_walk->clear_pt)
+			pte = 0;
+
 		/*
 		 * Set the XE_PTE_PS64 hint if possible, otherwise if
 		 * this device *requires* 64K PTE size for VRAM, fail.
 		 */
-		if (level == 0 && !xe_parent->is_compact) {
+		if (level == 0 && !xe_parent->is_compact && !xe_walk->clear_pt) {
 			if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
 				xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
 				pte |= XE_PTE_PS64;
@@ -519,7 +528,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		if (unlikely(ret))
 			return ret;
 
-		if (!is_null)
+		if (!is_null && !xe_walk->clear_pt)
 			xe_res_next(curs, next - addr);
 		xe_walk->va_curs_start = next;
 		xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
@@ -589,6 +598,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
  * @vma: The vma indicating the address range.
  * @entries: Storage for the update entries used for connecting the tree to
  * the main tree at commit time.
+ * @clear_pt: Clear the page table entries.
  * @num_entries: On output contains the number of @entries used.
  *
  * This function builds a disconnected page-table tree for a given address
@@ -602,7 +612,8 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
  */
 static int
 xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
-		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
+		 struct xe_vm_pgtable_update *entries,
+		 bool clear_pt, u32 *num_entries)
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_bo *bo = xe_vma_bo(vma);
@@ -622,10 +633,19 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.vma = vma,
 		.wupd.entries = entries,
 		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
+		.clear_pt = clear_pt,
 	};
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
 
+	if (clear_pt) {
+		ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma),
+				       xe_vma_end(vma), &xe_walk.base);
+
+		*num_entries = xe_walk.wupd.num_used_entries;
+		return ret;
+	}
+
 	/**
 	 * Default atomic expectations for different allocation scenarios are as follows:
 	 *
@@ -981,12 +1001,14 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
 
 static int
 xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
-		   struct xe_vm_pgtable_update *entries, u32 *num_entries)
+		   struct xe_vm_pgtable_update *entries,
+		   bool invalidate_on_bind, u32 *num_entries)
 {
 	int err;
 
 	*num_entries = 0;
-	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
+	err = xe_pt_stage_bind(tile, vma, entries, invalidate_on_bind,
+			       num_entries);
 	if (!err)
 		xe_tile_assert(tile, *num_entries);
 
@@ -1661,6 +1683,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
 		return err;
 
 	err = xe_pt_prepare_bind(tile, vma, pt_op->entries,
+				 pt_update_ops->invalidate_on_bind,
 				 &pt_op->num_entries);
 	if (!err) {
 		xe_tile_assert(tile, pt_op->num_entries <=
@@ -1685,7 +1708,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
 		 * it needs to be done here.
 		 */
 		if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
-		     xe_vm_in_preempt_fence_mode(vm)))
+		     xe_vm_in_preempt_fence_mode(vm)) || pt_update_ops->invalidate_on_bind)
 			pt_update_ops->needs_invalidation = true;
 		else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
 			/* We bump also if batch_invalidate_tlb is true */
@@ -1759,9 +1782,13 @@ static int op_prepare(struct xe_vm *vm,
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+		if (!op->map.immediate && xe_vm_in_fault_mode(vm) &&
+		    !op->map.invalidate_on_bind)
 			break;
 
+		if (op->map.invalidate_on_bind)
+			pt_update_ops->invalidate_on_bind = true;
+
 		err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
 		pt_update_ops->wait_vm_kernel = true;
 		break;
@@ -1871,6 +1898,8 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
 	}
 	vma->tile_present |= BIT(tile->id);
 	vma->tile_staged &= ~BIT(tile->id);
+	if (pt_update_ops->invalidate_on_bind)
+		vma->tile_invalidated |= BIT(tile->id);
 	if (xe_vma_is_userptr(vma)) {
 		lockdep_assert_held_read(&vm->userptr.notifier_lock);
 		to_userptr_vma(vma)->userptr.initial_bind = true;
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 384cc04de719..3d0aa2a5102e 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -108,6 +108,8 @@ struct xe_vm_pgtable_update_ops {
 	bool needs_userptr_lock;
 	/** @needs_invalidation: Needs invalidation */
 	bool needs_invalidation;
+	/** @invalidate_on_bind: Invalidate the range before bind */
+	bool invalidate_on_bind;
 	/**
 	 * @wait_vm_bookkeep: PT operations need to wait until VM is idle
 	 * (bookkeep dma-resv slots are idle) and stage all future VM activity
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d664f2e418b2..813d893d9b63 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1921,6 +1921,23 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 }
 #endif
 
+static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
+{
+	if (!xe_vm_in_fault_mode(vm))
+		return false;
+
+	if (!NEEDS_SCRATCH(vm->xe))
+		return false;
+
+	if (!xe_vm_has_scratch(vm))
+		return false;
+
+	if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
+		return false;
+
+	return true;
+}
+
 /*
  * Create operations list from IOCTL arguments, setup operations fields so parse
  * and commit steps are decoupled from IOCTL arguments. This step can fail.
@@ -1991,6 +2008,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
 			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
 			op->map.pat_index = pat_index;
+			op->map.invalidate_on_bind =
+				__xe_vm_needs_clear_scratch_pages(vm, flags);
 		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
 			op->prefetch.region = prefetch_region;
 		}
@@ -2188,7 +2207,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
 				return PTR_ERR(vma);
 
 			op->map.vma = vma;
-			if (op->map.immediate || !xe_vm_in_fault_mode(vm))
+			if (op->map.immediate || !xe_vm_in_fault_mode(vm) ||
+			    op->map.invalidate_on_bind)
 				xe_vma_ops_incr_pt_update_ops(vops,
 							      op->tile_mask);
 			break;
@@ -2416,9 +2436,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		err = vma_lock_and_validate(exec, op->map.vma,
-					    !xe_vm_in_fault_mode(vm) ||
-					    op->map.immediate);
+		if (!op->map.invalidate_on_bind)
+			err = vma_lock_and_validate(exec, op->map.vma,
+						    !xe_vm_in_fault_mode(vm) ||
+						    op->map.immediate);
 		break;
 	case DRM_GPUVA_OP_REMAP:
 		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 52467b9b5348..dace04f4ea5e 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -297,6 +297,8 @@ struct xe_vma_op_map {
 	bool is_null;
 	/** @dumpable: whether BO is dumped on GPU hang */
 	bool dumpable;
+	/** @invalidate: invalidate the VMA before bind */
+	bool invalidate_on_bind;
 	/** @pat_index: The pat index to use for this operation. */
 	u16 pat_index;
 };
-- 
2.26.3


  reply	other threads:[~2025-02-06  1:56 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-06  2:11 [PATCH 1/3] drm/xe: Introduced needs_scratch bit in device descriptor Oak Zeng
2025-02-06  2:11 ` Oak Zeng [this message]
2025-02-06  2:11 ` [PATCH 3/3] drm/xe: Allow scratch page under fault mode for certain platform Oak Zeng
2025-02-06  2:12 ` ✓ CI.Patch_applied: success for series starting with [1/3] drm/xe: Introduced needs_scratch bit in device descriptor Patchwork
2025-02-06  2:13 ` ✗ CI.checkpatch: warning " Patchwork
2025-02-06  2:14 ` ✓ CI.KUnit: success " Patchwork
2025-02-06  2:30 ` ✓ CI.Build: " Patchwork
2025-02-06  2:33 ` ✓ CI.Hooks: " Patchwork
2025-02-06  2:34 ` ✓ CI.checksparse: " Patchwork
2025-02-06  2:53 ` ✓ Xe.CI.BAT: " Patchwork
2025-02-06  3:48 ` ✗ Xe.CI.Full: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2025-02-06 21:38 [PATCH 1/3] " Oak Zeng
2025-02-06 21:38 ` [PATCH 2/3] drm/xe: Clear scratch page on vm_bind Oak Zeng
2025-02-13  2:23 [PATCH 1/3] drm/xe: Introduced needs_scratch bit in device descriptor Oak Zeng
2025-02-13  2:23 ` [PATCH 2/3] drm/xe: Clear scratch page on vm_bind Oak Zeng
2025-02-19 17:47   ` Matthew Brost
2025-02-19 20:19     ` Zeng, Oak
2025-02-19 20:46       ` Matthew Brost
2025-02-20 21:09         ` Matthew Brost
2025-02-25 22:54   ` Matthew Brost
2025-02-26 18:49     ` Zeng, Oak
2025-02-26 21:44       ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250206021126.3420-2-oak.zeng@intel.com \
    --to=oak.zeng@intel.com \
    --cc=Thomas.Hellstrom@linux.intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jonathan.cavitt@intel.com \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox