Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: thomas.hellstrom@linux.intel.com, himal.prasad.ghimiray@intel.com
Subject: [PATCH] drm/xe: Thread prefetch of SVM ranges
Date: Wed, 28 May 2025 10:27:25 -0700	[thread overview]
Message-ID: <20250528172725.1669802-1-matthew.brost@intel.com> (raw)

The migrate_vma_* functions are very CPU-intensive; thus, prefetching of
SVM ranges is limited by the CPU rather than the paging copy engine
bandwidth. In an effort to speed up the prefetching of SVM ranges, the
step that calls migrate_vma_* is now threaded. This utilizes the
existing page fault work queue for threading.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 111 +++++++++++++++++++++++++++----------
 1 file changed, 83 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 5a978da411b0..18e5a36c6c21 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2878,53 +2878,108 @@ static int check_ufence(struct xe_vma *vma)
 	return 0;
 }
 
-static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+struct prefetch_thread {
+	struct work_struct work;
+	struct drm_gpusvm_ctx *ctx;
+	struct xe_vma *vma;
+	struct xe_svm_range *svm_range;
+	u32 region;
+	int err;
+};
+
+static void prefetch_work_func(struct work_struct *w)
 {
-	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
-	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+	struct prefetch_thread *thread =
+		container_of(w, struct prefetch_thread, work);
+	struct xe_vma *vma = thread->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_svm_range *svm_range = thread->svm_range;
+	u32 region = thread->region;
+	struct xe_tile *tile =
+		&vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
 	int err = 0;
 
-	struct xe_svm_range *svm_range;
+	if (!region) {
+		xe_svm_range_migrate_to_smem(vm, svm_range);
+	} else if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
+		err = xe_svm_alloc_vram(vm, tile, svm_range, thread->ctx);
+		if (err) {
+			drm_dbg(&vm->xe->drm,
+				"VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
+				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+			thread->err = -ENODATA;
+			return;
+		}
+		xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
+	}
+
+	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
+	if (err) {
+		if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
+			err = -ENODATA;
+		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
+			vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+		thread->err = err;
+		return;
+	}
+
+	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+}
+
+static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+	u32 j, region = op->prefetch_range.region;
 	struct drm_gpusvm_ctx ctx = {};
-	struct xe_tile *tile;
+	struct prefetch_thread *thread;
+	struct xe_svm_range *svm_range;
+	struct xarray prefetches;
+	struct xe_tile *tile =
+		&vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
 	unsigned long i;
-	u32 region;
+	bool devmem_possible = IS_DGFX(vm->xe) &&
+		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
+	int err = 0;
 
 	if (!xe_vma_is_cpu_addr_mirror(vma))
 		return 0;
 
-	region = op->prefetch_range.region;
+	xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
 
 	ctx.read_only = xe_vma_read_only(vma);
 	ctx.devmem_possible = devmem_possible;
 	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
 
-	/* TODO: Threading the migration */
 	xa_for_each(&op->prefetch_range.range, i, svm_range) {
-		if (!region)
-			xe_svm_range_migrate_to_smem(vm, svm_range);
+		thread = kmalloc(sizeof(*thread), GFP_KERNEL);
+		if (!thread)
+			goto wait_threads;
 
-		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
-			tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
-			err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
-			if (err) {
-				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
-					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
-				return -ENODATA;
-			}
-			xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
-		}
-
-		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
+		err = xa_alloc(&prefetches, &j, thread, xa_limit_32b,
+			       GFP_KERNEL);
 		if (err) {
-			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
-				err = -ENODATA;
-			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
-				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
-			return err;
+			kfree(thread);
+			goto wait_threads;
 		}
-		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+
+		INIT_WORK(&thread->work, prefetch_work_func);
+		thread->ctx = &ctx;
+		thread->vma = vma;
+		thread->svm_range = svm_range;
+		thread->region = region;
+		thread->err = 0;
+
+		queue_work(tile->primary_gt->usm.pf_wq, &thread->work);
+	}
+
+wait_threads:
+	xa_for_each(&prefetches, i, thread) {
+		flush_work(&thread->work);
+		if (thread->err && (!err || err == -ENODATA))
+			err = thread->err;
+		kfree(thread);
 	}
+	xa_destroy(&prefetches);
 
 	return err;
 }
-- 
2.34.1


             reply	other threads:[~2025-05-28 17:25 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-28 17:27 Matthew Brost [this message]
2025-05-28 19:17 ` ✓ CI.Patch_applied: success for drm/xe: Thread prefetch of SVM ranges Patchwork
2025-05-28 19:17 ` ✓ CI.checkpatch: " Patchwork
2025-05-28 19:18 ` ✓ CI.KUnit: " Patchwork
2025-05-28 19:29 ` ✓ CI.Build: " Patchwork
2025-05-28 19:31 ` ✓ CI.Hooks: " Patchwork
2025-05-28 19:33 ` ✓ CI.checksparse: " Patchwork
2025-05-28 20:01 ` ✓ Xe.CI.BAT: " Patchwork
2025-05-28 22:43 ` ✓ Xe.CI.Full: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2025-06-16  6:47 [PATCH] " Matthew Brost
2025-06-16  8:28 ` Thomas Hellström
2025-06-16  8:58   ` Matthew Brost
2025-06-16  9:24     ` Thomas Hellström
2025-06-16 12:06       ` Mrozek, Michal
2025-06-17 14:30         ` Matthew Brost
2025-06-16 11:20     ` Thomas Hellström
2025-06-17 17:10       ` Matthew Brost
2025-06-17 12:43 ` Thomas Hellström
2025-06-17 14:43   ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250528172725.1669802-1-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox