Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Oak Zeng <oak.zeng@intel.com>
To: intel-xe@lists.freedesktop.org
Subject: [CI 28/44] drm/xe: Moving to range based vma invalidation
Date: Fri, 14 Jun 2024 17:58:01 -0400	[thread overview]
Message-ID: <20240614215817.1097633-28-oak.zeng@intel.com> (raw)
In-Reply-To: <20240614215817.1097633-1-oak.zeng@intel.com>

New parameters are introduced to the vma invalidation function to allow
partially invalidate a vma.

For userptr invalidation, we now only invalidate the mmu notified range
instead of the whole vma. This is more efficient than the whole vma based
invalidation.

For other cases, still keep the whole vma invalidation scheme.

One of the consequence of this change is, we now don't have information
whether a vma is fully invalidated or not, because vma can be partially
invalidated now. The tile_invalidated member is deleted due to this
reason.

This is prepare work for system allocator where we want to apply range
based vma invalidation. It is also reasonable to apply the same scheme
for userptr invalidation.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           |  2 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 11 ---------
 drivers/gpu/drm/xe/xe_pt.c           | 16 +++++++++---
 drivers/gpu/drm/xe/xe_pt.h           |  2 +-
 drivers/gpu/drm/xe/xe_vm.c           | 37 +++++++++++++++++++---------
 drivers/gpu/drm/xe/xe_vm.h           |  2 +-
 drivers/gpu/drm/xe/xe_vm_types.h     |  3 ---
 7 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 371ea9a5dd16..71670e7397fc 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -514,7 +514,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 			struct xe_vma *vma = gpuva_to_vma(gpuva);
 
 			trace_xe_vma_evict(vma);
-			ret = xe_vm_invalidate_vma(vma);
+			ret = xe_vm_invalidate_vma(vma, xe_vma_start(vma), xe_vma_end(vma));
 			if (XE_WARN_ON(ret))
 				return ret;
 		}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 9292d5468868..9e84cff964b8 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -65,12 +65,6 @@ static bool access_is_atomic(enum access_type access_type)
 	return access_type == ACCESS_TYPE_ATOMIC;
 }
 
-static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
-{
-	return BIT(tile->id) & vma->tile_present &&
-		!(BIT(tile->id) & vma->tile_invalidated);
-}
-
 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
 {
 	if (page_addr > xe_vma_end(vma) - 1 ||
@@ -138,10 +132,6 @@ static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf,
 	trace_xe_vma_pagefault(vma);
 	atomic = access_is_atomic(pf->access_type);
 
-	/* Check if VMA is valid */
-	if (vma_is_valid(tile, vma) && !atomic)
-		return 0;
-
 retry_userptr:
 	if (xe_vma_is_userptr(vma) &&
 	    xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
@@ -175,7 +165,6 @@ static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf,
 
 	dma_fence_wait(fence, false);
 	dma_fence_put(fence);
-	vma->tile_invalidated &= ~BIT(tile->id);
 
 unlock_dma_resv:
 	drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index e4b42aa2517f..415386852e3b 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -792,6 +792,8 @@ static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
  * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
  * @tile: The tile we're zapping for.
  * @vma: GPU VMA detailing address range.
+ * @start: start of the range.
+ * @end: end of the range.
  *
  * Eviction and Userptr invalidation needs to be able to zap the
  * gpu ptes of a given address range in pagefaulting mode.
@@ -803,8 +805,11 @@ static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
  *
  * Return: Whether ptes were actually updated and a TLB invalidation is
  * required.
+ *
+ * FIXME: double confirm xe_pt_walk_shared support walking of a sub-range of
+ * vma (vs whole vma)
  */
-bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma, u64 start, u64 end)
 {
 	struct xe_pt_zap_ptes_walk xe_walk = {
 		.base = {
@@ -815,13 +820,16 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
 		.tile = tile,
 	};
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
-	u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated);
+	u8 pt_mask = vma->tile_present;
+
+	xe_assert(tile_to_xe(tile), start >= xe_vma_start(vma));
+	xe_assert(tile_to_xe(tile), end <= xe_vma_end(vma));
 
 	if (!(pt_mask & BIT(tile->id)))
 		return false;
 
-	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
-				xe_vma_end(vma), &xe_walk.base);
+	(void)xe_pt_walk_shared(&pt->base, pt->level, start,
+				end, &xe_walk.base);
 
 	return xe_walk.needs_invalidate;
 }
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 9ab386431cad..aa8ff28e75b0 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -41,6 +41,6 @@ struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile,
 void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops);
 void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops);
 
-bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma, u64 start, u64 end);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 97c6f25760ac..b0da8821fc9e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -617,21 +617,31 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 {
 	struct xe_userptr *userptr = container_of(mni, typeof(*userptr), hmmptr.notifier);
 	struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
+	u64 range_start, range_end, range_size, range_offset;
 	struct xe_vma *vma = &uvma->vma;
 	struct xe_vm *vm = xe_vma_vm(vma);
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
+	u64 start, end;
 	long err;
 
+	range_start = max_t(u64, xe_vma_userptr(vma), range->start);
+	range_end = min_t(u64, xe_vma_userptr_end(vma), range->end);
+	range_size = range_end - range_start;
+	range_offset = range_start - xe_vma_userptr(vma);
+
 	xe_assert(vm->xe, xe_vma_is_userptr(vma));
 	trace_xe_vma_userptr_invalidate(vma);
 
+	start = xe_vma_start(vma) + range_offset;
+	end = start + range_size;
+
 	if (!mmu_notifier_range_blockable(range))
 		return false;
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
-		xe_vma_start(vma), xe_vma_size(vma));
+		start, range_size);
 
 	down_write(&vm->userptr.notifier_lock);
 	mmu_interval_set_seq(mni, cur_seq);
@@ -674,11 +684,11 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	XE_WARN_ON(err <= 0);
 
 	if (xe_vm_in_fault_mode(vm)) {
-		err = xe_vm_invalidate_vma(vma);
+		err = xe_vm_invalidate_vma(vma, start, end);
 		XE_WARN_ON(err);
 	}
 
-	xe_vma_userptr_dma_unmap_pages(uvma, xe_vma_userptr(vma), xe_vma_userptr_end(vma));
+	xe_vma_userptr_dma_unmap_pages(uvma, range_start, range_end);
 
 	trace_xe_vma_userptr_invalidate_complete(vma);
 
@@ -721,7 +731,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 					      DMA_RESV_USAGE_BOOKKEEP,
 					      false, MAX_SCHEDULE_TIMEOUT);
 
-			err = xe_vm_invalidate_vma(&uvma->vma);
+			err = xe_vm_invalidate_vma(&uvma->vma, xe_vma_start(&uvma->vma),
+						   xe_vma_end(&uvma->vma));
 			xe_vm_unlock(vm);
 			if (err)
 				return err;
@@ -3207,8 +3218,10 @@ void xe_vm_unlock(struct xe_vm *vm)
 }
 
 /**
- * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * xe_vm_invalidate_vma - invalidate GPU mappings for a range of VMA without a lock
  * @vma: VMA to invalidate
+ * @start: start of the range.
+ * @end: end of the range.
  *
  * Walks a list of page tables leaves which it memset the entries owned by this
  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
@@ -3216,7 +3229,7 @@ void xe_vm_unlock(struct xe_vm *vm)
  *
  * Returns 0 for success, negative error code otherwise.
  */
-int xe_vm_invalidate_vma(struct xe_vma *vma)
+int xe_vm_invalidate_vma(struct xe_vma *vma, u64 start, u64 end)
 {
 	struct xe_device *xe = xe_vma_vm(vma)->xe;
 	struct xe_tile *tile;
@@ -3227,11 +3240,13 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 
 	xe_assert(xe, !xe_vma_is_null(vma));
 	xe_assert(xe, !xe_vma_is_system_allocator(vma));
+	xe_assert(xe, start >= xe_vma_start(vma));
+	xe_assert(xe, end <= xe_vma_end(vma));
 	trace_xe_vma_invalidate(vma);
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
-		xe_vma_start(vma), xe_vma_size(vma));
+		start, end - start);
 
 	/* Check that we don't race with page-table updates */
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
@@ -3248,14 +3263,16 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	}
 
 	for_each_tile(tile, xe, id) {
-		if (xe_pt_zap_ptes(tile, vma)) {
+		if (xe_pt_zap_ptes(tile, vma, start, end)) {
 			tile_needs_invalidate |= BIT(id);
 			xe_device_wmb(xe);
 			/*
 			 * FIXME: We potentially need to invalidate multiple
 			 * GTs within the tile
 			 */
-			seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
+			seqno[id] = xe_gt_tlb_invalidation_range(tile->primary_gt, NULL,
+								 start, end,
+								 xe_vma_vm(vma)->usm.asid);
 			if (seqno[id] < 0)
 				return seqno[id];
 		}
@@ -3269,8 +3286,6 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 		}
 	}
 
-	vma->tile_invalidated = vma->tile_mask;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 89f3306561ad..7c10f6c60b63 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -223,7 +223,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
 				u8 tile_mask);
 
-int xe_vm_invalidate_vma(struct xe_vma *vma);
+int xe_vm_invalidate_vma(struct xe_vma *vma, u64 start, u64 end);
 
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
 {
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 976982972a06..4d9707c19031 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -75,9 +75,6 @@ struct xe_vma {
 		struct work_struct destroy_work;
 	};
 
-	/** @tile_invalidated: VMA has been invalidated */
-	u8 tile_invalidated;
-
 	/** @tile_mask: Tile mask of where to create binding for this VMA */
 	u8 tile_mask;
 
-- 
2.26.3


  parent reply	other threads:[~2024-06-14 21:49 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-14 21:57 [CI 01/44] mm/hmm: let users to tag specific PFNs Oak Zeng
2024-06-14 21:57 ` [CI 02/44] dma-mapping: provide an interface to allocate IOVA Oak Zeng
2024-06-14 21:57 ` [CI 03/44] dma-mapping: provide callbacks to link/unlink pages to specific IOVA Oak Zeng
2024-06-14 21:57 ` [CI 04/44] iommu/dma: Provide an interface to allow preallocate IOVA Oak Zeng
2024-06-14 21:57 ` [CI 05/44] iommu/dma: Prepare map/unmap page functions to receive IOVA Oak Zeng
2024-06-14 21:57 ` [CI 06/44] iommu/dma: Implement link/unlink page callbacks Oak Zeng
2024-06-14 21:57 ` [CI 07/44] mm: Add an empty implementation of zone_device_page_init Oak Zeng
2024-06-14 21:57 ` [CI 08/44] drm: Move GPUVA_START/LAST to drm_gpuvm.h Oak Zeng
2024-06-14 21:57 ` [CI 09/44] drm/svm: Mark drm_gpuvm to participate SVM Oak Zeng
2024-06-14 21:57 ` [CI 10/44] drm/svm: introduce drm_mem_region concept Oak Zeng
2024-06-14 21:57 ` [CI 11/44] drm/svm: introduce hmmptr and helper functions Oak Zeng
2024-06-14 21:57 ` [CI 12/44] drm/svm: Introduce helper to remap drm memory region Oak Zeng
2024-06-14 21:57 ` [CI 13/44] drm/svm: handle CPU page fault Oak Zeng
2024-06-14 21:57 ` [CI 14/44] drm/svm: Migrate a range of hmmptr to vram Oak Zeng
2024-06-14 21:57 ` [CI 15/44] drm/svm: Add DRM SVM documentation Oak Zeng
2024-06-14 21:57 ` [CI 16/44] drm/svm: Introduce DRM_SVM kernel config Oak Zeng
2024-06-14 21:57 ` [CI 17/44] drm/xe: s/xe_tile_migrate_engine/xe_tile_migrate_exec_queue Oak Zeng
2024-06-14 21:57 ` [CI 18/44] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops Oak Zeng
2024-06-14 21:57 ` [CI 19/44] drm/xe: Convert multiple bind ops into single job Oak Zeng
2024-06-14 21:57 ` [CI 20/44] drm/xe: Update VM trace events Oak Zeng
2024-06-14 21:57 ` [CI 21/44] drm/xe: Update PT layer with better error handling Oak Zeng
2024-06-14 21:57 ` [CI 22/44] drm/xe: Retry BO allocation Oak Zeng
2024-06-14 21:57 ` [CI 23/44] drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR flag Oak Zeng
2024-06-14 21:57 ` [CI 24/44] drm/xe: Add a helper to calculate userptr end address Oak Zeng
2024-06-14 21:57 ` [CI 25/44] drm/xe: Add dma_addr res cursor Oak Zeng
2024-06-14 21:57 ` [CI 26/44] drm/xe: Use drm_mem_region for xe Oak Zeng
2024-06-14 21:58 ` [CI 27/44] drm/xe: use drm_hmmptr in xe Oak Zeng
2024-06-14 21:58 ` Oak Zeng [this message]
2024-06-14 21:58 ` [CI 29/44] drm/xe: Support range based page table update Oak Zeng
2024-06-14 21:58 ` [CI 30/44] drm/xe/uapi: Add DRM_XE_VM_CREATE_FLAG_PARTICIPATE_SVM flag Oak Zeng
2024-06-14 21:58 ` [CI 31/44] drm/xe/svm: Create userptr if page fault occurs on system_allocator VMA Oak Zeng
2024-06-14 21:58 ` [CI 32/44] drm/xe/svm: Add faulted userptr VMA garbage collector Oak Zeng
2024-06-14 21:58 ` [CI 33/44] drm/xe: Introduce helper to get tile from memory region Oak Zeng
2024-06-14 21:58 ` [CI 34/44] drm/xe/svm: implement functions to allocate and free device memory Oak Zeng
2024-06-14 21:58 ` [CI 35/44] drm/xe/svm: Get drm device from drm memory region Oak Zeng
2024-06-14 21:58 ` [CI 36/44] drm/xe/svm: Get page map owner of a " Oak Zeng
2024-06-14 21:58 ` [CI 37/44] drm/xe/svm: Add migrate layer functions for SVM support Oak Zeng
2024-06-14 21:58 ` [CI 38/44] drm/xe/svm: introduce svm migration function Oak Zeng
2024-06-14 21:58 ` [CI 39/44] drm/xe/svm: Register xe memory region to drm layer Oak Zeng
2024-06-14 21:58 ` [CI 40/44] drm/xe/svm: Introduce DRM_XE_SVM kernel config Oak Zeng
2024-06-14 21:58 ` [CI 41/44] drm/xe/svm: Migration from sram to vram for system allocator Oak Zeng
2024-06-14 21:58 ` [CI 42/44] drm/xe/svm: Determine a vma is backed by device memory Oak Zeng
2024-06-14 21:58 ` [CI 43/44] drm/xe/svm: Introduce hmm_pfn array based resource cursor Oak Zeng
2024-06-14 21:58 ` [CI 44/44] drm/xe: Enable system allocator uAPI Oak Zeng
2024-06-14 22:41 ` ✓ CI.Patch_applied: success for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs Patchwork
2024-06-14 22:42 ` ✗ CI.checkpatch: warning " Patchwork
2024-06-14 22:43 ` ✓ CI.KUnit: success " Patchwork
2024-06-14 22:55 ` ✓ CI.Build: " Patchwork
2024-06-14 22:57 ` ✗ CI.Hooks: failure " Patchwork
2024-06-14 22:58 ` ✗ CI.checksparse: warning " Patchwork
2024-06-14 23:21 ` ✓ CI.BAT: success " Patchwork
2024-06-15  2:56 ` ✓ CI.Patch_applied: success for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs (rev2) Patchwork
2024-06-15  2:57 ` ✗ CI.checkpatch: warning " Patchwork
2024-06-15  2:58 ` ✓ CI.KUnit: success " Patchwork
2024-06-15  3:10 ` ✓ CI.Build: " Patchwork
2024-06-15  3:12 ` ✗ CI.Hooks: failure " Patchwork
2024-06-15  3:14 ` ✗ CI.checksparse: warning " Patchwork
2024-06-15  3:36 ` ✓ CI.BAT: success " Patchwork
2024-06-15 13:59 ` ✗ CI.FULL: failure for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs Patchwork
2024-06-18  8:53 ` ✓ CI.FULL: success for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240614215817.1097633-28-oak.zeng@intel.com \
    --to=oak.zeng@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox