[PATCH] drm/xe: Thread prefetch of SVM ranges

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] drm/xe: Thread prefetch of SVM ranges
@ 2025-05-28 17:27 Matthew Brost
  0 siblings, 0 replies; 14+ messages in thread
From: Matthew Brost @ 2025-05-28 17:27 UTC (permalink / raw)
  To: intel-xe; +Cc: thomas.hellstrom, himal.prasad.ghimiray

The migrate_vma_* functions are very CPU-intensive; thus, prefetching of
SVM ranges is limited by the CPU rather than the paging copy engine
bandwidth. In an effort to speed up the prefetching of SVM ranges, the
step that calls migrate_vma_* is now threaded. This utilizes the
existing page fault work queue for threading.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 111 +++++++++++++++++++++++++++----------
 1 file changed, 83 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 5a978da411b0..18e5a36c6c21 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2878,53 +2878,108 @@ static int check_ufence(struct xe_vma *vma)
 	return 0;
 }
 
-static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+struct prefetch_thread {
+	struct work_struct work;
+	struct drm_gpusvm_ctx *ctx;
+	struct xe_vma *vma;
+	struct xe_svm_range *svm_range;
+	u32 region;
+	int err;
+};
+
+static void prefetch_work_func(struct work_struct *w)
 {
-	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
-	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+	struct prefetch_thread *thread =
+		container_of(w, struct prefetch_thread, work);
+	struct xe_vma *vma = thread->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_svm_range *svm_range = thread->svm_range;
+	u32 region = thread->region;
+	struct xe_tile *tile =
+		&vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
 	int err = 0;
 
-	struct xe_svm_range *svm_range;
+	if (!region) {
+		xe_svm_range_migrate_to_smem(vm, svm_range);
+	} else if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
+		err = xe_svm_alloc_vram(vm, tile, svm_range, thread->ctx);
+		if (err) {
+			drm_dbg(&vm->xe->drm,
+				"VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
+				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+			thread->err = -ENODATA;
+			return;
+		}
+		xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
+	}
+
+	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
+	if (err) {
+		if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
+			err = -ENODATA;
+		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
+			vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+		thread->err = err;
+		return;
+	}
+
+	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+}
+
+static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+	u32 j, region = op->prefetch_range.region;
 	struct drm_gpusvm_ctx ctx = {};
-	struct xe_tile *tile;
+	struct prefetch_thread *thread;
+	struct xe_svm_range *svm_range;
+	struct xarray prefetches;
+	struct xe_tile *tile =
+		&vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
 	unsigned long i;
-	u32 region;
+	bool devmem_possible = IS_DGFX(vm->xe) &&
+		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
+	int err = 0;
 
 	if (!xe_vma_is_cpu_addr_mirror(vma))
 		return 0;
 
-	region = op->prefetch_range.region;
+	xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
 
 	ctx.read_only = xe_vma_read_only(vma);
 	ctx.devmem_possible = devmem_possible;
 	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
 
-	/* TODO: Threading the migration */
 	xa_for_each(&op->prefetch_range.range, i, svm_range) {
-		if (!region)
-			xe_svm_range_migrate_to_smem(vm, svm_range);
+		thread = kmalloc(sizeof(*thread), GFP_KERNEL);
+		if (!thread)
+			goto wait_threads;
 
-		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
-			tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
-			err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
-			if (err) {
-				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
-					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
-				return -ENODATA;
-			}
-			xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
-		}
-
-		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
+		err = xa_alloc(&prefetches, &j, thread, xa_limit_32b,
+			       GFP_KERNEL);
 		if (err) {
-			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
-				err = -ENODATA;
-			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
-				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
-			return err;
+			kfree(thread);
+			goto wait_threads;
 		}
-		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+
+		INIT_WORK(&thread->work, prefetch_work_func);
+		thread->ctx = &ctx;
+		thread->vma = vma;
+		thread->svm_range = svm_range;
+		thread->region = region;
+		thread->err = 0;
+
+		queue_work(tile->primary_gt->usm.pf_wq, &thread->work);
+	}
+
+wait_threads:
+	xa_for_each(&prefetches, i, thread) {
+		flush_work(&thread->work);
+		if (thread->err && (!err || err == -ENODATA))
+			err = thread->err;
+		kfree(thread);
 	}
+	xa_destroy(&prefetches);
 
 	return err;
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] drm/xe: Thread prefetch of SVM ranges
@ 2025-06-16  6:47 Matthew Brost
  2025-06-16  8:28 ` Thomas Hellström
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Matthew Brost @ 2025-06-16  6:47 UTC (permalink / raw)
  To: intel-xe; +Cc: himal.prasad.ghimiray, thomas.hellstrom, michal.mrozek

The migrate_vma_* functions are very CPU-intensive; as a result,
prefetching SVM ranges is limited by CPU performance rather than paging
copy engine bandwidth. To accelerate SVM range prefetching, the step
that calls migrate_vma_* is now threaded. This uses a dedicated
workqueue, as the page fault workqueue cannot be shared without risking
deadlocks—due to the prefetch IOCTL holding the VM lock in write mode
while work items in the page fault workqueue also require the VM lock.

The prefetch workqueue is currently allocated in GT, similar to the page
fault workqueue. While this is likely not the ideal location for either,
refactoring will be deferred to a later patch.

Running xe_exec_system_allocator --r prefetch-benchmark, which tests
64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s with
this patch on drm-tip. Enabling high SLPC further increases throughput
to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16 GB/s. Both
of these optimizations are upcoming.

v2:
 - Use dedicated prefetch workqueue
 - Pick dedicated prefetch thread count based on profiling
 - Skip threaded prefetch for only 1 range or if prefetching to SRAM
 - Fully tested

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
 drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
 drivers/gpu/drm/xe/xe_vm.c           | 128 +++++++++++++++++++++------
 3 files changed, 135 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index e2d975b2fddb..941cca3371f2 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
 
 	destroy_workqueue(gt->usm.acc_wq);
 	destroy_workqueue(gt->usm.pf_wq);
+	if (gt->usm.prefetch_wq)
+		destroy_workqueue(gt->usm.prefetch_wq);
 }
 
 static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
@@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
 	return 0;
 }
 
+static int prefetch_thread_count(struct xe_device *xe)
+{
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/*
+	 * Based on profiling large aligned 2M prefetches, this is the optimial
+	 * number of threads on BMG (only platform currently supported). This
+	 * should be tuned for each supported platform and can change on per
+	 * platform basis as optimizations land (e.g., large device pages).
+	 */
+	return 5;
+}
+
 int xe_gt_pagefault_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	int i, ret = 0;
+	int i, count, ret = 0;
 
 	if (!xe->info.has_usm)
 		return 0;
@@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
 	if (!gt->usm.pf_wq)
 		return -ENOMEM;
 
+	count = prefetch_thread_count(xe);
+	if (count) {
+		gt->usm.prefetch_wq = alloc_workqueue("xe_gt_prefetch_work_queue",
+						      WQ_UNBOUND | WQ_HIGHPRI,
+						      count);
+		if (!gt->usm.prefetch_wq) {
+			destroy_workqueue(gt->usm.pf_wq);
+			return -ENOMEM;
+		}
+	}
+
 	gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
 					 WQ_UNBOUND | WQ_HIGHPRI,
 					 NUM_ACC_QUEUE);
 	if (!gt->usm.acc_wq) {
+		if (gt->usm.prefetch_wq)
+			destroy_workqueue(gt->usm.prefetch_wq);
 		destroy_workqueue(gt->usm.pf_wq);
 		return -ENOMEM;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7def0959da35..d9ba4921b8ce 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -239,6 +239,8 @@ struct xe_gt {
 		u16 reserved_bcs_instance;
 		/** @usm.pf_wq: page fault work queue, unbound, high priority */
 		struct workqueue_struct *pf_wq;
+		/** @usm.prefetch_wq: prefetch work queue, unbound, high priority */
+		struct workqueue_struct *prefetch_wq;
 		/** @usm.acc_wq: access counter work queue, unbound, high priority */
 		struct workqueue_struct *acc_wq;
 		/**
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 6ef8c4dab647..1ae8e03aead6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma *vma)
 	return 0;
 }
 
-static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+struct prefetch_thread {
+	struct work_struct work;
+	struct drm_gpusvm_ctx *ctx;
+	struct xe_vma *vma;
+	struct xe_svm_range *svm_range;
+	struct xe_tile *tile;
+	u32 region;
+	int err;
+};
+
+static void prefetch_work_func(struct work_struct *w)
 {
-	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
-	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+	struct prefetch_thread *thread =
+		container_of(w, struct prefetch_thread, work);
+	struct xe_vma *vma = thread->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_svm_range *svm_range = thread->svm_range;
+	u32 region = thread->region;
+	struct xe_tile *tile = thread->tile;
 	int err = 0;
 
-	struct xe_svm_range *svm_range;
+	if (!region) {
+		xe_svm_range_migrate_to_smem(vm, svm_range);
+	} else if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
+		err = xe_svm_alloc_vram(vm, tile, svm_range, thread->ctx);
+		if (err) {
+			drm_dbg(&vm->xe->drm,
+				"VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
+				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+			thread->err = -ENODATA;
+			return;
+		}
+		xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
+	}
+
+	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
+	if (err) {
+		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
+			vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+		if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
+			err = -ENODATA;
+		thread->err = err;
+		return;
+	}
+
+	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+}
+
+static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+	u32 j, region = op->prefetch_range.region;
 	struct drm_gpusvm_ctx ctx = {};
-	struct xe_tile *tile;
+	struct prefetch_thread stack_thread;
+	struct xe_svm_range *svm_range;
+	struct xarray prefetches;
+	bool sram = region_to_mem_type[region] == XE_PL_TT;
+	struct xe_tile *tile = sram ? xe_device_get_root_tile(vm->xe) :
+		&vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
 	unsigned long i;
-	u32 region;
+	bool devmem_possible = IS_DGFX(vm->xe) &&
+		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
+	bool skip_threads = op->prefetch_range.ranges_count == 1 || sram;
+	struct prefetch_thread *thread = skip_threads ? &stack_thread : NULL;
+	int err = 0;
 
 	if (!xe_vma_is_cpu_addr_mirror(vma))
 		return 0;
 
-	region = op->prefetch_range.region;
+	if (!skip_threads)
+		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
 
 	ctx.read_only = xe_vma_read_only(vma);
 	ctx.devmem_possible = devmem_possible;
 	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
 
-	/* TODO: Threading the migration */
 	xa_for_each(&op->prefetch_range.range, i, svm_range) {
-		if (!region)
-			xe_svm_range_migrate_to_smem(vm, svm_range);
+		if (!skip_threads) {
+			thread = kmalloc(sizeof(*thread), GFP_KERNEL);
+			if (!thread)
+				goto wait_threads;
 
-		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
-			tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
-			err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
+			err = xa_alloc(&prefetches, &j, thread, xa_limit_32b,
+				       GFP_KERNEL);
 			if (err) {
-				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
-					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
-				return -ENODATA;
+				kfree(thread);
+				goto wait_threads;
 			}
-			xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
 		}
 
-		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
-		if (err) {
-			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
-				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
-			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
-				err = -ENODATA;
-			return err;
+		INIT_WORK(&thread->work, prefetch_work_func);
+		thread->ctx = &ctx;
+		thread->vma = vma;
+		thread->svm_range = svm_range;
+		thread->tile = tile;
+		thread->region = region;
+		thread->err = 0;
+
+		if (skip_threads) {
+			prefetch_work_func(&thread->work);
+			if (thread->err)
+				return thread->err;
+		} else {
+			/*
+			 * Prefetch uses a dedicated workqueue, as the page
+			 * fault workqueue cannot be shared without risking
+			 * deadlocks—due to holding the VM lock in write mode
+			 * here while work items in the page fault workqueue
+			 * also require the VM lock.
+			 */
+			queue_work(tile->primary_gt->usm.prefetch_wq,
+				   &thread->work);
+		}
+	}
+
+wait_threads:
+	if (!skip_threads) {
+		xa_for_each(&prefetches, i, thread) {
+			flush_work(&thread->work);
+			if (thread->err && (!err || err == -ENODATA))
+				err = thread->err;
+			kfree(thread);
 		}
-		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+		xa_destroy(&prefetches);
 	}
 
 	return err;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16  6:47 [PATCH] drm/xe: Thread prefetch of SVM ranges Matthew Brost
@ 2025-06-16  8:28 ` Thomas Hellström
  2025-06-16  8:58   ` Matthew Brost
  2025-06-16 11:51 ` ✓ CI.KUnit: success for drm/xe: Thread prefetch of SVM ranges (rev2) Patchwork
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Thomas Hellström @ 2025-06-16  8:28 UTC (permalink / raw)
  To: Matthew Brost, intel-xe; +Cc: himal.prasad.ghimiray, michal.mrozek

On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> The migrate_vma_* functions are very CPU-intensive; as a result,
> prefetching SVM ranges is limited by CPU performance rather than
> paging
> copy engine bandwidth. To accelerate SVM range prefetching, the step
> that calls migrate_vma_* is now threaded. This uses a dedicated
> workqueue, as the page fault workqueue cannot be shared without
> risking
> deadlocks—due to the prefetch IOCTL holding the VM lock in write mode
> while work items in the page fault workqueue also require the VM
> lock.
> 
> The prefetch workqueue is currently allocated in GT, similar to the
> page
> fault workqueue. While this is likely not the ideal location for
> either,
> refactoring will be deferred to a later patch.
> 
> Running xe_exec_system_allocator --r prefetch-benchmark, which tests
> 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s with
> this patch on drm-tip. Enabling high SLPC further increases
> throughput
> to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16 GB/s.
> Both
> of these optimizations are upcoming.
> 
> v2:
>  - Use dedicated prefetch workqueue
>  - Pick dedicated prefetch thread count based on profiling
>  - Skip threaded prefetch for only 1 range or if prefetching to SRAM
>  - Fully tested
> 
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>

Hi,
Is this really the right place to do optimizations like this?

The migration takes place in xe_svm_alloc_vram() and is being moved to
drm_pagemap_populate_mm(). If those functions are considered to be slow
then they should be optimized, rather than calling them multiple times
in parallel from an outer layer? 

Before doing something like this I think we need to consider

1) Why are the migrate functions so cpu consuming? Do we have a
performance profile for it?
2) Do we actually *want* to use 5 CPU cores for this?
3) Isn't a single CPU write-combined non-temporal CPU memcopy enough to
saturate the system->VRAM bandwith?

Thanks,
Thomas



> ---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
>  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
>  drivers/gpu/drm/xe/xe_vm.c           | 128 +++++++++++++++++++++----
> --
>  3 files changed, 135 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index e2d975b2fddb..941cca3371f2 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
>  
>  	destroy_workqueue(gt->usm.acc_wq);
>  	destroy_workqueue(gt->usm.pf_wq);
> +	if (gt->usm.prefetch_wq)
> +		destroy_workqueue(gt->usm.prefetch_wq);
>  }
>  
>  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> *pf_queue)
> @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt *gt,
> struct pf_queue *pf_queue)
>  	return 0;
>  }
>  
> +static int prefetch_thread_count(struct xe_device *xe)
> +{
> +	if (!IS_DGFX(xe))
> +		return 0;
> +
> +	/*
> +	 * Based on profiling large aligned 2M prefetches, this is
> the optimial
> +	 * number of threads on BMG (only platform currently
> supported). This
> +	 * should be tuned for each supported platform and can
> change on per
> +	 * platform basis as optimizations land (e.g., large device
> pages).
> +	 */
> +	return 5;
> +}
> +
>  int xe_gt_pagefault_init(struct xe_gt *gt)
>  {
>  	struct xe_device *xe = gt_to_xe(gt);
> -	int i, ret = 0;
> +	int i, count, ret = 0;
>  
>  	if (!xe->info.has_usm)
>  		return 0;
> @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
>  	if (!gt->usm.pf_wq)
>  		return -ENOMEM;
>  
> +	count = prefetch_thread_count(xe);
> +	if (count) {
> +		gt->usm.prefetch_wq =
> alloc_workqueue("xe_gt_prefetch_work_queue",
> +						      WQ_UNBOUND |
> WQ_HIGHPRI,
> +						      count);
> +		if (!gt->usm.prefetch_wq) {
> +			destroy_workqueue(gt->usm.pf_wq);
> +			return -ENOMEM;
> +		}
> +	}
> +
>  	gt->usm.acc_wq =
> alloc_workqueue("xe_gt_access_counter_work_queue",
>  					 WQ_UNBOUND | WQ_HIGHPRI,
>  					 NUM_ACC_QUEUE);
>  	if (!gt->usm.acc_wq) {
> +		if (gt->usm.prefetch_wq)
> +			destroy_workqueue(gt->usm.prefetch_wq);
>  		destroy_workqueue(gt->usm.pf_wq);
>  		return -ENOMEM;
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> b/drivers/gpu/drm/xe/xe_gt_types.h
> index 7def0959da35..d9ba4921b8ce 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -239,6 +239,8 @@ struct xe_gt {
>  		u16 reserved_bcs_instance;
>  		/** @usm.pf_wq: page fault work queue, unbound, high
> priority */
>  		struct workqueue_struct *pf_wq;
> +		/** @usm.prefetch_wq: prefetch work queue, unbound,
> high priority */
> +		struct workqueue_struct *prefetch_wq;
>  		/** @usm.acc_wq: access counter work queue, unbound,
> high priority */
>  		struct workqueue_struct *acc_wq;
>  		/**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 6ef8c4dab647..1ae8e03aead6 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma *vma)
>  	return 0;
>  }
>  
> -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> +struct prefetch_thread {
> +	struct work_struct work;
> +	struct drm_gpusvm_ctx *ctx;
> +	struct xe_vma *vma;
> +	struct xe_svm_range *svm_range;
> +	struct xe_tile *tile;
> +	u32 region;
> +	int err;
> +};
> +
> +static void prefetch_work_func(struct work_struct *w)
>  {
> -	bool devmem_possible = IS_DGFX(vm->xe) &&
> IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> +	struct prefetch_thread *thread =
> +		container_of(w, struct prefetch_thread, work);
> +	struct xe_vma *vma = thread->vma;
> +	struct xe_vm *vm = xe_vma_vm(vma);
> +	struct xe_svm_range *svm_range = thread->svm_range;
> +	u32 region = thread->region;
> +	struct xe_tile *tile = thread->tile;
>  	int err = 0;
>  
> -	struct xe_svm_range *svm_range;
> +	if (!region) {
> +		xe_svm_range_migrate_to_smem(vm, svm_range);
> +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> vma, region)) {
> +		err = xe_svm_alloc_vram(vm, tile, svm_range, thread-
> >ctx);
> +		if (err) {
> +			drm_dbg(&vm->xe->drm,
> +				"VRAM allocation failed, retry from
> userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> +				vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> +			thread->err = -ENODATA;
> +			return;
> +		}
> +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> MIGRATED TO VRAM");
> +	}
> +
> +	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
> +	if (err) {
> +		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u,
> gpusvm=%p, errno=%pe\n",
> +			vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> +		if (err == -EOPNOTSUPP || err == -EFAULT || err == -
> EPERM)
> +			err = -ENODATA;
> +		thread->err = err;
> +		return;
> +	}
> +
> +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES
> DONE");
> +}
> +
> +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> +{
> +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> +	u32 j, region = op->prefetch_range.region;
>  	struct drm_gpusvm_ctx ctx = {};
> -	struct xe_tile *tile;
> +	struct prefetch_thread stack_thread;
> +	struct xe_svm_range *svm_range;
> +	struct xarray prefetches;
> +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> +	struct xe_tile *tile = sram ? xe_device_get_root_tile(vm-
> >xe) :
> +		&vm->xe->tiles[region_to_mem_type[region] -
> XE_PL_VRAM0];
>  	unsigned long i;
> -	u32 region;
> +	bool devmem_possible = IS_DGFX(vm->xe) &&
> +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> +	bool skip_threads = op->prefetch_range.ranges_count == 1 ||
> sram;
> +	struct prefetch_thread *thread = skip_threads ?
> &stack_thread : NULL;
> +	int err = 0;
>  
>  	if (!xe_vma_is_cpu_addr_mirror(vma))
>  		return 0;
>  
> -	region = op->prefetch_range.region;
> +	if (!skip_threads)
> +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
>  
>  	ctx.read_only = xe_vma_read_only(vma);
>  	ctx.devmem_possible = devmem_possible;
>  	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
>  
> -	/* TODO: Threading the migration */
>  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> -		if (!region)
> -			xe_svm_range_migrate_to_smem(vm, svm_range);
> +		if (!skip_threads) {
> +			thread = kmalloc(sizeof(*thread),
> GFP_KERNEL);
> +			if (!thread)
> +				goto wait_threads;
>  
> -		if (xe_svm_range_needs_migrate_to_vram(svm_range,
> vma, region)) {
> -			tile = &vm->xe-
> >tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> -			err = xe_svm_alloc_vram(vm, tile, svm_range,
> &ctx);
> +			err = xa_alloc(&prefetches, &j, thread,
> xa_limit_32b,
> +				       GFP_KERNEL);
>  			if (err) {
> -				drm_dbg(&vm->xe->drm, "VRAM
> allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> errno=%pe\n",
> -					vm->usm.asid, &vm-
> >svm.gpusvm, ERR_PTR(err));
> -				return -ENODATA;
> +				kfree(thread);
> +				goto wait_threads;
>  			}
> -			xe_svm_range_debug(svm_range, "PREFETCH -
> RANGE MIGRATED TO VRAM");
>  		}
>  
> -		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
> -		if (err) {
> -			drm_dbg(&vm->xe->drm, "Get pages failed,
> asid=%u, gpusvm=%p, errno=%pe\n",
> -				vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> -			if (err == -EOPNOTSUPP || err == -EFAULT ||
> err == -EPERM)
> -				err = -ENODATA;
> -			return err;
> +		INIT_WORK(&thread->work, prefetch_work_func);
> +		thread->ctx = &ctx;
> +		thread->vma = vma;
> +		thread->svm_range = svm_range;
> +		thread->tile = tile;
> +		thread->region = region;
> +		thread->err = 0;
> +
> +		if (skip_threads) {
> +			prefetch_work_func(&thread->work);
> +			if (thread->err)
> +				return thread->err;
> +		} else {
> +			/*
> +			 * Prefetch uses a dedicated workqueue, as
> the page
> +			 * fault workqueue cannot be shared without
> risking
> +			 * deadlocks—due to holding the VM lock in
> write mode
> +			 * here while work items in the page fault
> workqueue
> +			 * also require the VM lock.
> +			 */
> +			queue_work(tile->primary_gt-
> >usm.prefetch_wq,
> +				   &thread->work);
> +		}
> +	}
> +
> +wait_threads:
> +	if (!skip_threads) {
> +		xa_for_each(&prefetches, i, thread) {
> +			flush_work(&thread->work);
> +			if (thread->err && (!err || err == -
> ENODATA))
> +				err = thread->err;
> +			kfree(thread);
>  		}
> -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> PAGES DONE");
> +		xa_destroy(&prefetches);
>  	}
>  
>  	return err;


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16  8:28 ` Thomas Hellström
@ 2025-06-16  8:58   ` Matthew Brost
  2025-06-16  9:24     ` Thomas Hellström
  2025-06-16 11:20     ` Thomas Hellström
  0 siblings, 2 replies; 14+ messages in thread
From: Matthew Brost @ 2025-06-16  8:58 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-xe, himal.prasad.ghimiray, michal.mrozek

On Mon, Jun 16, 2025 at 10:28:16AM +0200, Thomas Hellström wrote:
> On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> > The migrate_vma_* functions are very CPU-intensive; as a result,
> > prefetching SVM ranges is limited by CPU performance rather than
> > paging
> > copy engine bandwidth. To accelerate SVM range prefetching, the step
> > that calls migrate_vma_* is now threaded. This uses a dedicated
> > workqueue, as the page fault workqueue cannot be shared without
> > risking
> > deadlocks—due to the prefetch IOCTL holding the VM lock in write mode
> > while work items in the page fault workqueue also require the VM
> > lock.
> > 
> > The prefetch workqueue is currently allocated in GT, similar to the
> > page
> > fault workqueue. While this is likely not the ideal location for
> > either,
> > refactoring will be deferred to a later patch.
> > 
> > Running xe_exec_system_allocator --r prefetch-benchmark, which tests
> > 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s with
> > this patch on drm-tip. Enabling high SLPC further increases
> > throughput
> > to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16 GB/s.
> > Both
> > of these optimizations are upcoming.
> > 
> > v2:
> >  - Use dedicated prefetch workqueue
> >  - Pick dedicated prefetch thread count based on profiling
> >  - Skip threaded prefetch for only 1 range or if prefetching to SRAM
> >  - Fully tested
> > 
> > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> 
> Hi,
> Is this really the right place to do optimizations like this?
> 

Yes, for now.

> The migration takes place in xe_svm_alloc_vram() and is being moved to
> drm_pagemap_populate_mm(). If those functions are considered to be slow
> then they should be optimized, rather than calling them multiple times
> in parallel from an outer layer? 
>

Shared code with an already-parallel fault handler... Prefetch is just
adding parallelism too.

> Before doing something like this I think we need to consider
> 
> 1) Why are the migrate functions so cpu consuming? Do we have a
> performance profile for it?

Yes, I have profiled this. On BMG, a 2MB migrate takes approximately
300µs of CPU overhead in the migrate_vma_* functions, while a copy job
takes around 130µs. The copy must complete between setup and finalize,
which serializes this flow.

Thus, as of now, the only way to saturate the copy engine is to use
threads so that CPU cycles can overlap.

Have you caught up on Nvidia's series [1] and what Francois is working
on? I'd guess we'll go from ~300µs to ~7µs once that lands.

I don't know why the migrate_vma_* functions take so long—the core MM
code is tough to read. I suppose I could hack it to find out.

[1] https://lore.kernel.org/linux-mm/20250306044239.3874247-1-balbirs@nvidia.com/ 

> 2) Do we actually *want* to use 5 CPU cores for this?

Yes, I profiled this with a test issuing 64MB prefetches—5 threads was
ideal. I have a comment in the code about this. Once [1] lands, we’ll
likely only need 2 threads on BMG. That would probably get us to a bus
8× faster than BMG; for 16×, we might need more threads. But I think
we’ll always want at least 2, as there will always be some CPU overhead
that limits copy bandwidth due to serialization.

> 3) Isn't a single CPU write-combined non-temporal CPU memcopy enough to
> saturate the system->VRAM bandwith?
> 

I'm not entirely following (see above), but almost certainly not.

Maat

> Thanks,
> Thomas
> 
> 
> 
> > ---
> >  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
> >  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
> >  drivers/gpu/drm/xe/xe_vm.c           | 128 +++++++++++++++++++++----
> > --
> >  3 files changed, 135 insertions(+), 26 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > index e2d975b2fddb..941cca3371f2 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
> >  
> >  	destroy_workqueue(gt->usm.acc_wq);
> >  	destroy_workqueue(gt->usm.pf_wq);
> > +	if (gt->usm.prefetch_wq)
> > +		destroy_workqueue(gt->usm.prefetch_wq);
> >  }
> >  
> >  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> > *pf_queue)
> > @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt *gt,
> > struct pf_queue *pf_queue)
> >  	return 0;
> >  }
> >  
> > +static int prefetch_thread_count(struct xe_device *xe)
> > +{
> > +	if (!IS_DGFX(xe))
> > +		return 0;
> > +
> > +	/*
> > +	 * Based on profiling large aligned 2M prefetches, this is
> > the optimial
> > +	 * number of threads on BMG (only platform currently
> > supported). This
> > +	 * should be tuned for each supported platform and can
> > change on per
> > +	 * platform basis as optimizations land (e.g., large device
> > pages).
> > +	 */
> > +	return 5;
> > +}
> > +
> >  int xe_gt_pagefault_init(struct xe_gt *gt)
> >  {
> >  	struct xe_device *xe = gt_to_xe(gt);
> > -	int i, ret = 0;
> > +	int i, count, ret = 0;
> >  
> >  	if (!xe->info.has_usm)
> >  		return 0;
> > @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
> >  	if (!gt->usm.pf_wq)
> >  		return -ENOMEM;
> >  
> > +	count = prefetch_thread_count(xe);
> > +	if (count) {
> > +		gt->usm.prefetch_wq =
> > alloc_workqueue("xe_gt_prefetch_work_queue",
> > +						      WQ_UNBOUND |
> > WQ_HIGHPRI,
> > +						      count);
> > +		if (!gt->usm.prefetch_wq) {
> > +			destroy_workqueue(gt->usm.pf_wq);
> > +			return -ENOMEM;
> > +		}
> > +	}
> > +
> >  	gt->usm.acc_wq =
> > alloc_workqueue("xe_gt_access_counter_work_queue",
> >  					 WQ_UNBOUND | WQ_HIGHPRI,
> >  					 NUM_ACC_QUEUE);
> >  	if (!gt->usm.acc_wq) {
> > +		if (gt->usm.prefetch_wq)
> > +			destroy_workqueue(gt->usm.prefetch_wq);
> >  		destroy_workqueue(gt->usm.pf_wq);
> >  		return -ENOMEM;
> >  	}
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > b/drivers/gpu/drm/xe/xe_gt_types.h
> > index 7def0959da35..d9ba4921b8ce 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -239,6 +239,8 @@ struct xe_gt {
> >  		u16 reserved_bcs_instance;
> >  		/** @usm.pf_wq: page fault work queue, unbound, high
> > priority */
> >  		struct workqueue_struct *pf_wq;
> > +		/** @usm.prefetch_wq: prefetch work queue, unbound,
> > high priority */
> > +		struct workqueue_struct *prefetch_wq;
> >  		/** @usm.acc_wq: access counter work queue, unbound,
> > high priority */
> >  		struct workqueue_struct *acc_wq;
> >  		/**
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 6ef8c4dab647..1ae8e03aead6 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma *vma)
> >  	return 0;
> >  }
> >  
> > -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> > +struct prefetch_thread {
> > +	struct work_struct work;
> > +	struct drm_gpusvm_ctx *ctx;
> > +	struct xe_vma *vma;
> > +	struct xe_svm_range *svm_range;
> > +	struct xe_tile *tile;
> > +	u32 region;
> > +	int err;
> > +};
> > +
> > +static void prefetch_work_func(struct work_struct *w)
> >  {
> > -	bool devmem_possible = IS_DGFX(vm->xe) &&
> > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > +	struct prefetch_thread *thread =
> > +		container_of(w, struct prefetch_thread, work);
> > +	struct xe_vma *vma = thread->vma;
> > +	struct xe_vm *vm = xe_vma_vm(vma);
> > +	struct xe_svm_range *svm_range = thread->svm_range;
> > +	u32 region = thread->region;
> > +	struct xe_tile *tile = thread->tile;
> >  	int err = 0;
> >  
> > -	struct xe_svm_range *svm_range;
> > +	if (!region) {
> > +		xe_svm_range_migrate_to_smem(vm, svm_range);
> > +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > vma, region)) {
> > +		err = xe_svm_alloc_vram(vm, tile, svm_range, thread-
> > >ctx);
> > +		if (err) {
> > +			drm_dbg(&vm->xe->drm,
> > +				"VRAM allocation failed, retry from
> > userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> > +				vm->usm.asid, &vm->svm.gpusvm,
> > ERR_PTR(err));
> > +			thread->err = -ENODATA;
> > +			return;
> > +		}
> > +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > MIGRATED TO VRAM");
> > +	}
> > +
> > +	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
> > +	if (err) {
> > +		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u,
> > gpusvm=%p, errno=%pe\n",
> > +			vm->usm.asid, &vm->svm.gpusvm,
> > ERR_PTR(err));
> > +		if (err == -EOPNOTSUPP || err == -EFAULT || err == -
> > EPERM)
> > +			err = -ENODATA;
> > +		thread->err = err;
> > +		return;
> > +	}
> > +
> > +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES
> > DONE");
> > +}
> > +
> > +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> > +{
> > +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > +	u32 j, region = op->prefetch_range.region;
> >  	struct drm_gpusvm_ctx ctx = {};
> > -	struct xe_tile *tile;
> > +	struct prefetch_thread stack_thread;
> > +	struct xe_svm_range *svm_range;
> > +	struct xarray prefetches;
> > +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> > +	struct xe_tile *tile = sram ? xe_device_get_root_tile(vm-
> > >xe) :
> > +		&vm->xe->tiles[region_to_mem_type[region] -
> > XE_PL_VRAM0];
> >  	unsigned long i;
> > -	u32 region;
> > +	bool devmem_possible = IS_DGFX(vm->xe) &&
> > +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > +	bool skip_threads = op->prefetch_range.ranges_count == 1 ||
> > sram;
> > +	struct prefetch_thread *thread = skip_threads ?
> > &stack_thread : NULL;
> > +	int err = 0;
> >  
> >  	if (!xe_vma_is_cpu_addr_mirror(vma))
> >  		return 0;
> >  
> > -	region = op->prefetch_range.region;
> > +	if (!skip_threads)
> > +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
> >  
> >  	ctx.read_only = xe_vma_read_only(vma);
> >  	ctx.devmem_possible = devmem_possible;
> >  	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
> >  
> > -	/* TODO: Threading the migration */
> >  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > -		if (!region)
> > -			xe_svm_range_migrate_to_smem(vm, svm_range);
> > +		if (!skip_threads) {
> > +			thread = kmalloc(sizeof(*thread),
> > GFP_KERNEL);
> > +			if (!thread)
> > +				goto wait_threads;
> >  
> > -		if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > vma, region)) {
> > -			tile = &vm->xe-
> > >tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> > -			err = xe_svm_alloc_vram(vm, tile, svm_range,
> > &ctx);
> > +			err = xa_alloc(&prefetches, &j, thread,
> > xa_limit_32b,
> > +				       GFP_KERNEL);
> >  			if (err) {
> > -				drm_dbg(&vm->xe->drm, "VRAM
> > allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> > errno=%pe\n",
> > -					vm->usm.asid, &vm-
> > >svm.gpusvm, ERR_PTR(err));
> > -				return -ENODATA;
> > +				kfree(thread);
> > +				goto wait_threads;
> >  			}
> > -			xe_svm_range_debug(svm_range, "PREFETCH -
> > RANGE MIGRATED TO VRAM");
> >  		}
> >  
> > -		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
> > -		if (err) {
> > -			drm_dbg(&vm->xe->drm, "Get pages failed,
> > asid=%u, gpusvm=%p, errno=%pe\n",
> > -				vm->usm.asid, &vm->svm.gpusvm,
> > ERR_PTR(err));
> > -			if (err == -EOPNOTSUPP || err == -EFAULT ||
> > err == -EPERM)
> > -				err = -ENODATA;
> > -			return err;
> > +		INIT_WORK(&thread->work, prefetch_work_func);
> > +		thread->ctx = &ctx;
> > +		thread->vma = vma;
> > +		thread->svm_range = svm_range;
> > +		thread->tile = tile;
> > +		thread->region = region;
> > +		thread->err = 0;
> > +
> > +		if (skip_threads) {
> > +			prefetch_work_func(&thread->work);
> > +			if (thread->err)
> > +				return thread->err;
> > +		} else {
> > +			/*
> > +			 * Prefetch uses a dedicated workqueue, as
> > the page
> > +			 * fault workqueue cannot be shared without
> > risking
> > +			 * deadlocks—due to holding the VM lock in
> > write mode
> > +			 * here while work items in the page fault
> > workqueue
> > +			 * also require the VM lock.
> > +			 */
> > +			queue_work(tile->primary_gt-
> > >usm.prefetch_wq,
> > +				   &thread->work);
> > +		}
> > +	}
> > +
> > +wait_threads:
> > +	if (!skip_threads) {
> > +		xa_for_each(&prefetches, i, thread) {
> > +			flush_work(&thread->work);
> > +			if (thread->err && (!err || err == -
> > ENODATA))
> > +				err = thread->err;
> > +			kfree(thread);
> >  		}
> > -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> > PAGES DONE");
> > +		xa_destroy(&prefetches);
> >  	}
> >  
> >  	return err;
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16  8:58   ` Matthew Brost
@ 2025-06-16  9:24     ` Thomas Hellström
  2025-06-16 12:06       ` Mrozek, Michal
  2025-06-16 11:20     ` Thomas Hellström
  1 sibling, 1 reply; 14+ messages in thread
From: Thomas Hellström @ 2025-06-16  9:24 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-xe, himal.prasad.ghimiray, michal.mrozek

On Mon, 2025-06-16 at 01:58 -0700, Matthew Brost wrote:
> On Mon, Jun 16, 2025 at 10:28:16AM +0200, Thomas Hellström wrote:
> > On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> > > The migrate_vma_* functions are very CPU-intensive; as a result,
> > > prefetching SVM ranges is limited by CPU performance rather than
> > > paging
> > > copy engine bandwidth. To accelerate SVM range prefetching, the
> > > step
> > > that calls migrate_vma_* is now threaded. This uses a dedicated
> > > workqueue, as the page fault workqueue cannot be shared without
> > > risking
> > > deadlocks—due to the prefetch IOCTL holding the VM lock in write
> > > mode
> > > while work items in the page fault workqueue also require the VM
> > > lock.
> > > 
> > > The prefetch workqueue is currently allocated in GT, similar to
> > > the
> > > page
> > > fault workqueue. While this is likely not the ideal location for
> > > either,
> > > refactoring will be deferred to a later patch.
> > > 
> > > Running xe_exec_system_allocator --r prefetch-benchmark, which
> > > tests
> > > 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s
> > > with
> > > this patch on drm-tip. Enabling high SLPC further increases
> > > throughput
> > > to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16
> > > GB/s.
> > > Both
> > > of these optimizations are upcoming.
> > > 
> > > v2:
> > >  - Use dedicated prefetch workqueue
> > >  - Pick dedicated prefetch thread count based on profiling
> > >  - Skip threaded prefetch for only 1 range or if prefetching to
> > > SRAM
> > >  - Fully tested
> > > 
> > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > 
> > Hi,
> > Is this really the right place to do optimizations like this?
> > 
> 
> Yes, for now.

I think it's pretty dangerous to rush in optimizations that add
technical depth. That was exactly what made i915 to what it was. 

> 
> > The migration takes place in xe_svm_alloc_vram() and is being moved
> > to
> > drm_pagemap_populate_mm(). If those functions are considered to be
> > slow
> > then they should be optimized, rather than calling them multiple
> > times
> > in parallel from an outer layer?
> > 
> 
> Shared code with an already-parallel fault handler... Prefetch is
> just
> adding parallelism too.

What I meant was thinking from a higher perspective. If we call
drm_pagemap_populate_mm() (that in theory is designed to be available
from another driver) we'd expect that function to perform in an optimal
way. If anywhare this parallelization should be placed in what is now
xe_svm_alloc_vram().

> 
> > Before doing something like this I think we need to consider
> > 
> > 1) Why are the migrate functions so cpu consuming? Do we have a
> > performance profile for it?
> 
> Yes, I have profiled this. On BMG, a 2MB migrate takes approximately
> 300µs of CPU overhead in the migrate_vma_* functions, while a copy
> job
> takes around 130µs. The copy must complete between setup and
> finalize,
> which serializes this flow.

> 
> Thus, as of now, the only way to saturate the copy engine is to use
> threads so that CPU cycles can overlap.

So we're not actually not using 100% CPU-time all the time but rather
spending time waiting for the GPU copy to finish? If so, the effect
comes more from being able to start the finalize() step early. Couldn't
that be made within a single thread using an alternative
synchronization strategy?

> 
> Have you caught up on Nvidia's series [1] and what Francois is
> working
> on? I'd guess we'll go from ~300µs to ~7µs once that lands.

Yes, been discussing that a lot with Francois, but that's orthogonal to
this, right?

> 
> I don't know why the migrate_vma_* functions take so long—the core MM
> code is tough to read. I suppose I could hack it to find out.
> 
> [1]
> https://lore.kernel.org/linux-mm/20250306044239.3874247-1-balbirs@nvidia.com/
>  
> 
> > 2) Do we actually *want* to use 5 CPU cores for this?
> 
> Yes, I profiled this with a test issuing 64MB prefetches—5 threads
> was
> ideal. I have a comment in the code about this. Once [1] lands, we’ll
> likely only need 2 threads on BMG. That would probably get us to a
> bus
> 8× faster than BMG; for 16×, we might need more threads. But I think
> we’ll always want at least 2, as there will always be some CPU
> overhead
> that limits copy bandwidth due to serialization.

What I meant was IIRC NEO has previously been picky about starting
threads. Perhaps Michal can enlighten us here?

> 
> > 3) Isn't a single CPU write-combined non-temporal CPU memcopy
> > enough to
> > saturate the system->VRAM bandwith?
> > 
> 
> I'm not entirely following (see above), but almost certainly not.

What I meant was instead of migrating using GPU with 5 threads to
mitigate GPU latencies, wouldn't a CPU optimized memcopy from system to
VRAM be able to saturate the PCIe bus at 16GiB/s without any latencies
to be considered at all.

/Thomas


> 
> Maat
> 
> > Thanks,
> > Thomas
> > 
> > 
> > 
> > > ---
> > >  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
> > >  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
> > >  drivers/gpu/drm/xe/xe_vm.c           | 128
> > > +++++++++++++++++++++----
> > > --
> > >  3 files changed, 135 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > index e2d975b2fddb..941cca3371f2 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
> > >  
> > >  	destroy_workqueue(gt->usm.acc_wq);
> > >  	destroy_workqueue(gt->usm.pf_wq);
> > > +	if (gt->usm.prefetch_wq)
> > > +		destroy_workqueue(gt->usm.prefetch_wq);
> > >  }
> > >  
> > >  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> > > *pf_queue)
> > > @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt
> > > *gt,
> > > struct pf_queue *pf_queue)
> > >  	return 0;
> > >  }
> > >  
> > > +static int prefetch_thread_count(struct xe_device *xe)
> > > +{
> > > +	if (!IS_DGFX(xe))
> > > +		return 0;
> > > +
> > > +	/*
> > > +	 * Based on profiling large aligned 2M prefetches, this
> > > is
> > > the optimial
> > > +	 * number of threads on BMG (only platform currently
> > > supported). This
> > > +	 * should be tuned for each supported platform and can
> > > change on per
> > > +	 * platform basis as optimizations land (e.g., large
> > > device
> > > pages).
> > > +	 */
> > > +	return 5;
> > > +}
> > > +
> > >  int xe_gt_pagefault_init(struct xe_gt *gt)
> > >  {
> > >  	struct xe_device *xe = gt_to_xe(gt);
> > > -	int i, ret = 0;
> > > +	int i, count, ret = 0;
> > >  
> > >  	if (!xe->info.has_usm)
> > >  		return 0;
> > > @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
> > >  	if (!gt->usm.pf_wq)
> > >  		return -ENOMEM;
> > >  
> > > +	count = prefetch_thread_count(xe);
> > > +	if (count) {
> > > +		gt->usm.prefetch_wq =
> > > alloc_workqueue("xe_gt_prefetch_work_queue",
> > > +						      WQ_UNBOUND
> > > |
> > > WQ_HIGHPRI,
> > > +						      count);
> > > +		if (!gt->usm.prefetch_wq) {
> > > +			destroy_workqueue(gt->usm.pf_wq);
> > > +			return -ENOMEM;
> > > +		}
> > > +	}
> > > +
> > >  	gt->usm.acc_wq =
> > > alloc_workqueue("xe_gt_access_counter_work_queue",
> > >  					 WQ_UNBOUND |
> > > WQ_HIGHPRI,
> > >  					 NUM_ACC_QUEUE);
> > >  	if (!gt->usm.acc_wq) {
> > > +		if (gt->usm.prefetch_wq)
> > > +			destroy_workqueue(gt->usm.prefetch_wq);
> > >  		destroy_workqueue(gt->usm.pf_wq);
> > >  		return -ENOMEM;
> > >  	}
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > > b/drivers/gpu/drm/xe/xe_gt_types.h
> > > index 7def0959da35..d9ba4921b8ce 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > @@ -239,6 +239,8 @@ struct xe_gt {
> > >  		u16 reserved_bcs_instance;
> > >  		/** @usm.pf_wq: page fault work queue, unbound,
> > > high
> > > priority */
> > >  		struct workqueue_struct *pf_wq;
> > > +		/** @usm.prefetch_wq: prefetch work queue,
> > > unbound,
> > > high priority */
> > > +		struct workqueue_struct *prefetch_wq;
> > >  		/** @usm.acc_wq: access counter work queue,
> > > unbound,
> > > high priority */
> > >  		struct workqueue_struct *acc_wq;
> > >  		/**
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > > b/drivers/gpu/drm/xe/xe_vm.c
> > > index 6ef8c4dab647..1ae8e03aead6 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma
> > > *vma)
> > >  	return 0;
> > >  }
> > >  
> > > -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op
> > > *op)
> > > +struct prefetch_thread {
> > > +	struct work_struct work;
> > > +	struct drm_gpusvm_ctx *ctx;
> > > +	struct xe_vma *vma;
> > > +	struct xe_svm_range *svm_range;
> > > +	struct xe_tile *tile;
> > > +	u32 region;
> > > +	int err;
> > > +};
> > > +
> > > +static void prefetch_work_func(struct work_struct *w)
> > >  {
> > > -	bool devmem_possible = IS_DGFX(vm->xe) &&
> > > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > > -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > +	struct prefetch_thread *thread =
> > > +		container_of(w, struct prefetch_thread, work);
> > > +	struct xe_vma *vma = thread->vma;
> > > +	struct xe_vm *vm = xe_vma_vm(vma);
> > > +	struct xe_svm_range *svm_range = thread->svm_range;
> > > +	u32 region = thread->region;
> > > +	struct xe_tile *tile = thread->tile;
> > >  	int err = 0;
> > >  
> > > -	struct xe_svm_range *svm_range;
> > > +	if (!region) {
> > > +		xe_svm_range_migrate_to_smem(vm, svm_range);
> > > +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > > vma, region)) {
> > > +		err = xe_svm_alloc_vram(vm, tile, svm_range,
> > > thread-
> > > > ctx);
> > > +		if (err) {
> > > +			drm_dbg(&vm->xe->drm,
> > > +				"VRAM allocation failed, retry
> > > from
> > > userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> > > +				vm->usm.asid, &vm->svm.gpusvm,
> > > ERR_PTR(err));
> > > +			thread->err = -ENODATA;
> > > +			return;
> > > +		}
> > > +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > > MIGRATED TO VRAM");
> > > +	}
> > > +
> > > +	err = xe_svm_range_get_pages(vm, svm_range, thread-
> > > >ctx);
> > > +	if (err) {
> > > +		drm_dbg(&vm->xe->drm, "Get pages failed,
> > > asid=%u,
> > > gpusvm=%p, errno=%pe\n",
> > > +			vm->usm.asid, &vm->svm.gpusvm,
> > > ERR_PTR(err));
> > > +		if (err == -EOPNOTSUPP || err == -EFAULT || err
> > > == -
> > > EPERM)
> > > +			err = -ENODATA;
> > > +		thread->err = err;
> > > +		return;
> > > +	}
> > > +
> > > +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> > > PAGES
> > > DONE");
> > > +}
> > > +
> > > +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op
> > > *op)
> > > +{
> > > +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > +	u32 j, region = op->prefetch_range.region;
> > >  	struct drm_gpusvm_ctx ctx = {};
> > > -	struct xe_tile *tile;
> > > +	struct prefetch_thread stack_thread;
> > > +	struct xe_svm_range *svm_range;
> > > +	struct xarray prefetches;
> > > +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> > > +	struct xe_tile *tile = sram ?
> > > xe_device_get_root_tile(vm-
> > > > xe) :
> > > +		&vm->xe->tiles[region_to_mem_type[region] -
> > > XE_PL_VRAM0];
> > >  	unsigned long i;
> > > -	u32 region;
> > > +	bool devmem_possible = IS_DGFX(vm->xe) &&
> > > +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > > +	bool skip_threads = op->prefetch_range.ranges_count == 1
> > > ||
> > > sram;
> > > +	struct prefetch_thread *thread = skip_threads ?
> > > &stack_thread : NULL;
> > > +	int err = 0;
> > >  
> > >  	if (!xe_vma_is_cpu_addr_mirror(vma))
> > >  		return 0;
> > >  
> > > -	region = op->prefetch_range.region;
> > > +	if (!skip_threads)
> > > +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
> > >  
> > >  	ctx.read_only = xe_vma_read_only(vma);
> > >  	ctx.devmem_possible = devmem_possible;
> > >  	ctx.check_pages_threshold = devmem_possible ? SZ_64K :
> > > 0;
> > >  
> > > -	/* TODO: Threading the migration */
> > >  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > > -		if (!region)
> > > -			xe_svm_range_migrate_to_smem(vm,
> > > svm_range);
> > > +		if (!skip_threads) {
> > > +			thread = kmalloc(sizeof(*thread),
> > > GFP_KERNEL);
> > > +			if (!thread)
> > > +				goto wait_threads;
> > >  
> > > -		if
> > > (xe_svm_range_needs_migrate_to_vram(svm_range,
> > > vma, region)) {
> > > -			tile = &vm->xe-
> > > > tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> > > -			err = xe_svm_alloc_vram(vm, tile,
> > > svm_range,
> > > &ctx);
> > > +			err = xa_alloc(&prefetches, &j, thread,
> > > xa_limit_32b,
> > > +				       GFP_KERNEL);
> > >  			if (err) {
> > > -				drm_dbg(&vm->xe->drm, "VRAM
> > > allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> > > errno=%pe\n",
> > > -					vm->usm.asid, &vm-
> > > > svm.gpusvm, ERR_PTR(err));
> > > -				return -ENODATA;
> > > +				kfree(thread);
> > > +				goto wait_threads;
> > >  			}
> > > -			xe_svm_range_debug(svm_range, "PREFETCH
> > > -
> > > RANGE MIGRATED TO VRAM");
> > >  		}
> > >  
> > > -		err = xe_svm_range_get_pages(vm, svm_range,
> > > &ctx);
> > > -		if (err) {
> > > -			drm_dbg(&vm->xe->drm, "Get pages failed,
> > > asid=%u, gpusvm=%p, errno=%pe\n",
> > > -				vm->usm.asid, &vm->svm.gpusvm,
> > > ERR_PTR(err));
> > > -			if (err == -EOPNOTSUPP || err == -EFAULT
> > > ||
> > > err == -EPERM)
> > > -				err = -ENODATA;
> > > -			return err;
> > > +		INIT_WORK(&thread->work, prefetch_work_func);
> > > +		thread->ctx = &ctx;
> > > +		thread->vma = vma;
> > > +		thread->svm_range = svm_range;
> > > +		thread->tile = tile;
> > > +		thread->region = region;
> > > +		thread->err = 0;
> > > +
> > > +		if (skip_threads) {
> > > +			prefetch_work_func(&thread->work);
> > > +			if (thread->err)
> > > +				return thread->err;
> > > +		} else {
> > > +			/*
> > > +			 * Prefetch uses a dedicated workqueue,
> > > as
> > > the page
> > > +			 * fault workqueue cannot be shared
> > > without
> > > risking
> > > +			 * deadlocks—due to holding the VM lock
> > > in
> > > write mode
> > > +			 * here while work items in the page
> > > fault
> > > workqueue
> > > +			 * also require the VM lock.
> > > +			 */
> > > +			queue_work(tile->primary_gt-
> > > > usm.prefetch_wq,
> > > +				   &thread->work);
> > > +		}
> > > +	}
> > > +
> > > +wait_threads:
> > > +	if (!skip_threads) {
> > > +		xa_for_each(&prefetches, i, thread) {
> > > +			flush_work(&thread->work);
> > > +			if (thread->err && (!err || err == -
> > > ENODATA))
> > > +				err = thread->err;
> > > +			kfree(thread);
> > >  		}
> > > -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > > GET
> > > PAGES DONE");
> > > +		xa_destroy(&prefetches);
> > >  	}
> > >  
> > >  	return err;
> > 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16  8:58   ` Matthew Brost
  2025-06-16  9:24     ` Thomas Hellström
@ 2025-06-16 11:20     ` Thomas Hellström
  2025-06-17 17:10       ` Matthew Brost
  1 sibling, 1 reply; 14+ messages in thread
From: Thomas Hellström @ 2025-06-16 11:20 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-xe, himal.prasad.ghimiray, michal.mrozek

Hi,

Wait, let me take a closer look. I got the impression from the commit
message that parallelization was done on a lower level than it actually
was.

Thomas.


On Mon, 2025-06-16 at 01:58 -0700, Matthew Brost wrote:
> On Mon, Jun 16, 2025 at 10:28:16AM +0200, Thomas Hellström wrote:
> > On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> > > The migrate_vma_* functions are very CPU-intensive; as a result,
> > > prefetching SVM ranges is limited by CPU performance rather than
> > > paging
> > > copy engine bandwidth. To accelerate SVM range prefetching, the
> > > step
> > > that calls migrate_vma_* is now threaded. This uses a dedicated
> > > workqueue, as the page fault workqueue cannot be shared without
> > > risking
> > > deadlocks—due to the prefetch IOCTL holding the VM lock in write
> > > mode
> > > while work items in the page fault workqueue also require the VM
> > > lock.
> > > 
> > > The prefetch workqueue is currently allocated in GT, similar to
> > > the
> > > page
> > > fault workqueue. While this is likely not the ideal location for
> > > either,
> > > refactoring will be deferred to a later patch.
> > > 
> > > Running xe_exec_system_allocator --r prefetch-benchmark, which
> > > tests
> > > 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s
> > > with
> > > this patch on drm-tip. Enabling high SLPC further increases
> > > throughput
> > > to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16
> > > GB/s.
> > > Both
> > > of these optimizations are upcoming.
> > > 
> > > v2:
> > >  - Use dedicated prefetch workqueue
> > >  - Pick dedicated prefetch thread count based on profiling
> > >  - Skip threaded prefetch for only 1 range or if prefetching to
> > > SRAM
> > >  - Fully tested
> > > 
> > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > 
> > Hi,
> > Is this really the right place to do optimizations like this?
> > 
> 
> Yes, for now.
> 
> > The migration takes place in xe_svm_alloc_vram() and is being moved
> > to
> > drm_pagemap_populate_mm(). If those functions are considered to be
> > slow
> > then they should be optimized, rather than calling them multiple
> > times
> > in parallel from an outer layer? 
> > 
> 
> Shared code with an already-parallel fault handler... Prefetch is
> just
> adding parallelism too.
> 
> > Before doing something like this I think we need to consider
> > 
> > 1) Why are the migrate functions so cpu consuming? Do we have a
> > performance profile for it?
> 
> Yes, I have profiled this. On BMG, a 2MB migrate takes approximately
> 300µs of CPU overhead in the migrate_vma_* functions, while a copy
> job
> takes around 130µs. The copy must complete between setup and
> finalize,
> which serializes this flow.
> 
> Thus, as of now, the only way to saturate the copy engine is to use
> threads so that CPU cycles can overlap.
> 
> Have you caught up on Nvidia's series [1] and what Francois is
> working
> on? I'd guess we'll go from ~300µs to ~7µs once that lands.
> 
> I don't know why the migrate_vma_* functions take so long—the core MM
> code is tough to read. I suppose I could hack it to find out.
> 
> [1]
> https://lore.kernel.org/linux-mm/20250306044239.3874247-1-balbirs@nvidia.com/
>  
> 
> > 2) Do we actually *want* to use 5 CPU cores for this?
> 
> Yes, I profiled this with a test issuing 64MB prefetches—5 threads
> was
> ideal. I have a comment in the code about this. Once [1] lands, we’ll
> likely only need 2 threads on BMG. That would probably get us to a
> bus
> 8× faster than BMG; for 16×, we might need more threads. But I think
> we’ll always want at least 2, as there will always be some CPU
> overhead
> that limits copy bandwidth due to serialization.
> 
> > 3) Isn't a single CPU write-combined non-temporal CPU memcopy
> > enough to
> > saturate the system->VRAM bandwith?
> > 
> 
> I'm not entirely following (see above), but almost certainly not.
> 
> Maat
> 
> > Thanks,
> > Thomas
> > 
> > 
> > 
> > > ---
> > >  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
> > >  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
> > >  drivers/gpu/drm/xe/xe_vm.c           | 128
> > > +++++++++++++++++++++----
> > > --
> > >  3 files changed, 135 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > index e2d975b2fddb..941cca3371f2 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
> > >  
> > >  	destroy_workqueue(gt->usm.acc_wq);
> > >  	destroy_workqueue(gt->usm.pf_wq);
> > > +	if (gt->usm.prefetch_wq)
> > > +		destroy_workqueue(gt->usm.prefetch_wq);
> > >  }
> > >  
> > >  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> > > *pf_queue)
> > > @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt
> > > *gt,
> > > struct pf_queue *pf_queue)
> > >  	return 0;
> > >  }
> > >  
> > > +static int prefetch_thread_count(struct xe_device *xe)
> > > +{
> > > +	if (!IS_DGFX(xe))
> > > +		return 0;
> > > +
> > > +	/*
> > > +	 * Based on profiling large aligned 2M prefetches, this
> > > is
> > > the optimial
> > > +	 * number of threads on BMG (only platform currently
> > > supported). This
> > > +	 * should be tuned for each supported platform and can
> > > change on per
> > > +	 * platform basis as optimizations land (e.g., large
> > > device
> > > pages).
> > > +	 */
> > > +	return 5;
> > > +}
> > > +
> > >  int xe_gt_pagefault_init(struct xe_gt *gt)
> > >  {
> > >  	struct xe_device *xe = gt_to_xe(gt);
> > > -	int i, ret = 0;
> > > +	int i, count, ret = 0;
> > >  
> > >  	if (!xe->info.has_usm)
> > >  		return 0;
> > > @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
> > >  	if (!gt->usm.pf_wq)
> > >  		return -ENOMEM;
> > >  
> > > +	count = prefetch_thread_count(xe);
> > > +	if (count) {
> > > +		gt->usm.prefetch_wq =
> > > alloc_workqueue("xe_gt_prefetch_work_queue",
> > > +						      WQ_UNBOUND
> > > |
> > > WQ_HIGHPRI,
> > > +						      count);
> > > +		if (!gt->usm.prefetch_wq) {
> > > +			destroy_workqueue(gt->usm.pf_wq);
> > > +			return -ENOMEM;
> > > +		}
> > > +	}
> > > +
> > >  	gt->usm.acc_wq =
> > > alloc_workqueue("xe_gt_access_counter_work_queue",
> > >  					 WQ_UNBOUND |
> > > WQ_HIGHPRI,
> > >  					 NUM_ACC_QUEUE);
> > >  	if (!gt->usm.acc_wq) {
> > > +		if (gt->usm.prefetch_wq)
> > > +			destroy_workqueue(gt->usm.prefetch_wq);
> > >  		destroy_workqueue(gt->usm.pf_wq);
> > >  		return -ENOMEM;
> > >  	}
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > > b/drivers/gpu/drm/xe/xe_gt_types.h
> > > index 7def0959da35..d9ba4921b8ce 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > @@ -239,6 +239,8 @@ struct xe_gt {
> > >  		u16 reserved_bcs_instance;
> > >  		/** @usm.pf_wq: page fault work queue, unbound,
> > > high
> > > priority */
> > >  		struct workqueue_struct *pf_wq;
> > > +		/** @usm.prefetch_wq: prefetch work queue,
> > > unbound,
> > > high priority */
> > > +		struct workqueue_struct *prefetch_wq;
> > >  		/** @usm.acc_wq: access counter work queue,
> > > unbound,
> > > high priority */
> > >  		struct workqueue_struct *acc_wq;
> > >  		/**
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > > b/drivers/gpu/drm/xe/xe_vm.c
> > > index 6ef8c4dab647..1ae8e03aead6 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma
> > > *vma)
> > >  	return 0;
> > >  }
> > >  
> > > -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op
> > > *op)
> > > +struct prefetch_thread {
> > > +	struct work_struct work;
> > > +	struct drm_gpusvm_ctx *ctx;
> > > +	struct xe_vma *vma;
> > > +	struct xe_svm_range *svm_range;
> > > +	struct xe_tile *tile;
> > > +	u32 region;
> > > +	int err;
> > > +};
> > > +
> > > +static void prefetch_work_func(struct work_struct *w)
> > >  {
> > > -	bool devmem_possible = IS_DGFX(vm->xe) &&
> > > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > > -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > +	struct prefetch_thread *thread =
> > > +		container_of(w, struct prefetch_thread, work);
> > > +	struct xe_vma *vma = thread->vma;
> > > +	struct xe_vm *vm = xe_vma_vm(vma);
> > > +	struct xe_svm_range *svm_range = thread->svm_range;
> > > +	u32 region = thread->region;
> > > +	struct xe_tile *tile = thread->tile;
> > >  	int err = 0;
> > >  
> > > -	struct xe_svm_range *svm_range;
> > > +	if (!region) {
> > > +		xe_svm_range_migrate_to_smem(vm, svm_range);
> > > +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > > vma, region)) {
> > > +		err = xe_svm_alloc_vram(vm, tile, svm_range,
> > > thread-
> > > > ctx);
> > > +		if (err) {
> > > +			drm_dbg(&vm->xe->drm,
> > > +				"VRAM allocation failed, retry
> > > from
> > > userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> > > +				vm->usm.asid, &vm->svm.gpusvm,
> > > ERR_PTR(err));
> > > +			thread->err = -ENODATA;
> > > +			return;
> > > +		}
> > > +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > > MIGRATED TO VRAM");
> > > +	}
> > > +
> > > +	err = xe_svm_range_get_pages(vm, svm_range, thread-
> > > >ctx);
> > > +	if (err) {
> > > +		drm_dbg(&vm->xe->drm, "Get pages failed,
> > > asid=%u,
> > > gpusvm=%p, errno=%pe\n",
> > > +			vm->usm.asid, &vm->svm.gpusvm,
> > > ERR_PTR(err));
> > > +		if (err == -EOPNOTSUPP || err == -EFAULT || err
> > > == -
> > > EPERM)
> > > +			err = -ENODATA;
> > > +		thread->err = err;
> > > +		return;
> > > +	}
> > > +
> > > +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> > > PAGES
> > > DONE");
> > > +}
> > > +
> > > +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op
> > > *op)
> > > +{
> > > +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > +	u32 j, region = op->prefetch_range.region;
> > >  	struct drm_gpusvm_ctx ctx = {};
> > > -	struct xe_tile *tile;
> > > +	struct prefetch_thread stack_thread;
> > > +	struct xe_svm_range *svm_range;
> > > +	struct xarray prefetches;
> > > +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> > > +	struct xe_tile *tile = sram ?
> > > xe_device_get_root_tile(vm-
> > > > xe) :
> > > +		&vm->xe->tiles[region_to_mem_type[region] -
> > > XE_PL_VRAM0];
> > >  	unsigned long i;
> > > -	u32 region;
> > > +	bool devmem_possible = IS_DGFX(vm->xe) &&
> > > +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > > +	bool skip_threads = op->prefetch_range.ranges_count == 1
> > > ||
> > > sram;
> > > +	struct prefetch_thread *thread = skip_threads ?
> > > &stack_thread : NULL;
> > > +	int err = 0;
> > >  
> > >  	if (!xe_vma_is_cpu_addr_mirror(vma))
> > >  		return 0;
> > >  
> > > -	region = op->prefetch_range.region;
> > > +	if (!skip_threads)
> > > +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
> > >  
> > >  	ctx.read_only = xe_vma_read_only(vma);
> > >  	ctx.devmem_possible = devmem_possible;
> > >  	ctx.check_pages_threshold = devmem_possible ? SZ_64K :
> > > 0;
> > >  
> > > -	/* TODO: Threading the migration */
> > >  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > > -		if (!region)
> > > -			xe_svm_range_migrate_to_smem(vm,
> > > svm_range);
> > > +		if (!skip_threads) {
> > > +			thread = kmalloc(sizeof(*thread),
> > > GFP_KERNEL);
> > > +			if (!thread)
> > > +				goto wait_threads;
> > >  
> > > -		if
> > > (xe_svm_range_needs_migrate_to_vram(svm_range,
> > > vma, region)) {
> > > -			tile = &vm->xe-
> > > > tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> > > -			err = xe_svm_alloc_vram(vm, tile,
> > > svm_range,
> > > &ctx);
> > > +			err = xa_alloc(&prefetches, &j, thread,
> > > xa_limit_32b,
> > > +				       GFP_KERNEL);
> > >  			if (err) {
> > > -				drm_dbg(&vm->xe->drm, "VRAM
> > > allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> > > errno=%pe\n",
> > > -					vm->usm.asid, &vm-
> > > > svm.gpusvm, ERR_PTR(err));
> > > -				return -ENODATA;
> > > +				kfree(thread);
> > > +				goto wait_threads;
> > >  			}
> > > -			xe_svm_range_debug(svm_range, "PREFETCH
> > > -
> > > RANGE MIGRATED TO VRAM");
> > >  		}
> > >  
> > > -		err = xe_svm_range_get_pages(vm, svm_range,
> > > &ctx);
> > > -		if (err) {
> > > -			drm_dbg(&vm->xe->drm, "Get pages failed,
> > > asid=%u, gpusvm=%p, errno=%pe\n",
> > > -				vm->usm.asid, &vm->svm.gpusvm,
> > > ERR_PTR(err));
> > > -			if (err == -EOPNOTSUPP || err == -EFAULT
> > > ||
> > > err == -EPERM)
> > > -				err = -ENODATA;
> > > -			return err;
> > > +		INIT_WORK(&thread->work, prefetch_work_func);
> > > +		thread->ctx = &ctx;
> > > +		thread->vma = vma;
> > > +		thread->svm_range = svm_range;
> > > +		thread->tile = tile;
> > > +		thread->region = region;
> > > +		thread->err = 0;
> > > +
> > > +		if (skip_threads) {
> > > +			prefetch_work_func(&thread->work);
> > > +			if (thread->err)
> > > +				return thread->err;
> > > +		} else {
> > > +			/*
> > > +			 * Prefetch uses a dedicated workqueue,
> > > as
> > > the page
> > > +			 * fault workqueue cannot be shared
> > > without
> > > risking
> > > +			 * deadlocks—due to holding the VM lock
> > > in
> > > write mode
> > > +			 * here while work items in the page
> > > fault
> > > workqueue
> > > +			 * also require the VM lock.
> > > +			 */
> > > +			queue_work(tile->primary_gt-
> > > > usm.prefetch_wq,
> > > +				   &thread->work);
> > > +		}
> > > +	}
> > > +
> > > +wait_threads:
> > > +	if (!skip_threads) {
> > > +		xa_for_each(&prefetches, i, thread) {
> > > +			flush_work(&thread->work);
> > > +			if (thread->err && (!err || err == -
> > > ENODATA))
> > > +				err = thread->err;
> > > +			kfree(thread);
> > >  		}
> > > -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > > GET
> > > PAGES DONE");
> > > +		xa_destroy(&prefetches);
> > >  	}
> > >  
> > >  	return err;
> > 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✓ CI.KUnit: success for drm/xe: Thread prefetch of SVM ranges (rev2)
  2025-06-16  6:47 [PATCH] drm/xe: Thread prefetch of SVM ranges Matthew Brost
  2025-06-16  8:28 ` Thomas Hellström
@ 2025-06-16 11:51 ` Patchwork
  2025-06-16 12:32 ` ✓ Xe.CI.BAT: " Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2025-06-16 11:51 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-xe

== Series Details ==

Series: drm/xe: Thread prefetch of SVM ranges (rev2)
URL   : https://patchwork.freedesktop.org/series/149605/
State : success

== Summary ==

+ trap cleanup EXIT
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/xe/.kunitconfig
[11:50:03] Configuring KUnit Kernel ...
Generating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[11:50:08] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make all compile_commands.json scripts_gdb ARCH=um O=.kunit --jobs=48
[11:50:34] Starting KUnit Kernel (1/1)...
[11:50:34] ============================================================
Running tests with:
$ .kunit/linux kunit.enable=1 mem=1G console=tty kunit_shutdown=halt
[11:50:35] ================== guc_buf (11 subtests) ===================
[11:50:35] [PASSED] test_smallest
[11:50:35] [PASSED] test_largest
[11:50:35] [PASSED] test_granular
[11:50:35] [PASSED] test_unique
[11:50:35] [PASSED] test_overlap
[11:50:35] [PASSED] test_reusable
[11:50:35] [PASSED] test_too_big
[11:50:35] [PASSED] test_flush
[11:50:35] [PASSED] test_lookup
[11:50:35] [PASSED] test_data
[11:50:35] [PASSED] test_class
[11:50:35] ===================== [PASSED] guc_buf =====================
[11:50:35] =================== guc_dbm (7 subtests) ===================
[11:50:35] [PASSED] test_empty
[11:50:35] [PASSED] test_default
[11:50:35] ======================== test_size  ========================
[11:50:35] [PASSED] 4
[11:50:35] [PASSED] 8
[11:50:35] [PASSED] 32
[11:50:35] [PASSED] 256
[11:50:35] ==================== [PASSED] test_size ====================
[11:50:35] ======================= test_reuse  ========================
[11:50:35] [PASSED] 4
[11:50:35] [PASSED] 8
[11:50:35] [PASSED] 32
[11:50:35] [PASSED] 256
[11:50:35] =================== [PASSED] test_reuse ====================
[11:50:35] =================== test_range_overlap  ====================
[11:50:35] [PASSED] 4
[11:50:35] [PASSED] 8
[11:50:35] [PASSED] 32
[11:50:35] [PASSED] 256
[11:50:35] =============== [PASSED] test_range_overlap ================
[11:50:35] =================== test_range_compact  ====================
[11:50:35] [PASSED] 4
[11:50:35] [PASSED] 8
[11:50:35] [PASSED] 32
[11:50:35] [PASSED] 256
[11:50:35] =============== [PASSED] test_range_compact ================
[11:50:35] ==================== test_range_spare  =====================
[11:50:35] [PASSED] 4
[11:50:35] [PASSED] 8
[11:50:35] [PASSED] 32
[11:50:35] [PASSED] 256
[11:50:35] ================ [PASSED] test_range_spare =================
[11:50:35] ===================== [PASSED] guc_dbm =====================
[11:50:35] =================== guc_idm (6 subtests) ===================
[11:50:35] [PASSED] bad_init
[11:50:35] [PASSED] no_init
[11:50:35] [PASSED] init_fini
[11:50:35] [PASSED] check_used
[11:50:35] [PASSED] check_quota
[11:50:35] [PASSED] check_all
[11:50:35] ===================== [PASSED] guc_idm =====================
[11:50:35] ================== no_relay (3 subtests) ===================
[11:50:35] [PASSED] xe_drops_guc2pf_if_not_ready
[11:50:35] [PASSED] xe_drops_guc2vf_if_not_ready
[11:50:35] [PASSED] xe_rejects_send_if_not_ready
[11:50:35] ==================== [PASSED] no_relay =====================
[11:50:35] ================== pf_relay (14 subtests) ==================
[11:50:35] [PASSED] pf_rejects_guc2pf_too_short
[11:50:35] [PASSED] pf_rejects_guc2pf_too_long
[11:50:35] [PASSED] pf_rejects_guc2pf_no_payload
[11:50:35] [PASSED] pf_fails_no_payload
[11:50:35] [PASSED] pf_fails_bad_origin
[11:50:35] [PASSED] pf_fails_bad_type
[11:50:35] [PASSED] pf_txn_reports_error
[11:50:35] [PASSED] pf_txn_sends_pf2guc
[11:50:35] [PASSED] pf_sends_pf2guc
[11:50:35] [SKIPPED] pf_loopback_nop
[11:50:35] [SKIPPED] pf_loopback_echo
[11:50:35] [SKIPPED] pf_loopback_fail
[11:50:35] [SKIPPED] pf_loopback_busy
[11:50:35] [SKIPPED] pf_loopback_retry
[11:50:35] ==================== [PASSED] pf_relay =====================
[11:50:35] ================== vf_relay (3 subtests) ===================
[11:50:35] [PASSED] vf_rejects_guc2vf_too_short
[11:50:35] [PASSED] vf_rejects_guc2vf_too_long
[11:50:35] [PASSED] vf_rejects_guc2vf_no_payload
[11:50:35] ==================== [PASSED] vf_relay =====================
[11:50:35] ================= pf_service (11 subtests) =================
[11:50:35] [PASSED] pf_negotiate_any
[11:50:35] [PASSED] pf_negotiate_base_match
[11:50:35] [PASSED] pf_negotiate_base_newer
[11:50:35] [PASSED] pf_negotiate_base_next
[11:50:35] [SKIPPED] pf_negotiate_base_older
[11:50:35] [PASSED] pf_negotiate_base_prev
[11:50:35] [PASSED] pf_negotiate_latest_match
[11:50:35] [PASSED] pf_negotiate_latest_newer
[11:50:35] [PASSED] pf_negotiate_latest_next
[11:50:35] [SKIPPED] pf_negotiate_latest_older
[11:50:35] [SKIPPED] pf_negotiate_latest_prev
[11:50:35] =================== [PASSED] pf_service ====================
[11:50:35] ===================== lmtt (1 subtest) =====================
[11:50:35] ======================== test_ops  =========================
[11:50:35] [PASSED] 2-level
[11:50:35] [PASSED] multi-level
[11:50:35] ==================== [PASSED] test_ops =====================
[11:50:35] ====================== [PASSED] lmtt =======================
[11:50:35] =================== xe_mocs (2 subtests) ===================
[11:50:35] ================ xe_live_mocs_kernel_kunit  ================
[11:50:35] =========== [SKIPPED] xe_live_mocs_kernel_kunit ============
[11:50:35] ================ xe_live_mocs_reset_kunit  =================
[11:50:35] ============ [SKIPPED] xe_live_mocs_reset_kunit ============
[11:50:35] ==================== [SKIPPED] xe_mocs =====================
[11:50:35] ================= xe_migrate (2 subtests) ==================
[11:50:35] ================= xe_migrate_sanity_kunit  =================
[11:50:35] ============ [SKIPPED] xe_migrate_sanity_kunit =============
[11:50:35] ================== xe_validate_ccs_kunit  ==================
[11:50:35] ============= [SKIPPED] xe_validate_ccs_kunit ==============
[11:50:35] =================== [SKIPPED] xe_migrate ===================
[11:50:35] ================== xe_dma_buf (1 subtest) ==================
[11:50:35] ==================== xe_dma_buf_kunit  =====================
[11:50:35] ================ [SKIPPED] xe_dma_buf_kunit ================
[11:50:35] =================== [SKIPPED] xe_dma_buf ===================
[11:50:35] ================= xe_bo_shrink (1 subtest) =================
[11:50:35] =================== xe_bo_shrink_kunit  ====================
[11:50:35] =============== [SKIPPED] xe_bo_shrink_kunit ===============
[11:50:35] ================== [SKIPPED] xe_bo_shrink ==================
[11:50:35] ==================== xe_bo (2 subtests) ====================
[11:50:35] ================== xe_ccs_migrate_kunit  ===================
[11:50:35] ============== [SKIPPED] xe_ccs_migrate_kunit ==============
[11:50:35] ==================== xe_bo_evict_kunit  ====================
[11:50:35] =============== [SKIPPED] xe_bo_evict_kunit ================
[11:50:35] ===================== [SKIPPED] xe_bo ======================
[11:50:35] ==================== args (11 subtests) ====================
[11:50:35] [PASSED] count_args_test
[11:50:35] [PASSED] call_args_example
[11:50:35] [PASSED] call_args_test
[11:50:35] [PASSED] drop_first_arg_example
[11:50:35] [PASSED] drop_first_arg_test
[11:50:35] [PASSED] first_arg_example
[11:50:35] [PASSED] first_arg_test
[11:50:35] [PASSED] last_arg_example
[11:50:35] [PASSED] last_arg_test
[11:50:35] [PASSED] pick_arg_example
[11:50:35] [PASSED] sep_comma_example
[11:50:35] ====================== [PASSED] args =======================
[11:50:35] =================== xe_pci (2 subtests) ====================
[11:50:35] [PASSED] xe_gmdid_graphics_ip
[11:50:35] [PASSED] xe_gmdid_media_ip
[11:50:35] ===================== [PASSED] xe_pci ======================
[11:50:35] =================== xe_rtp (2 subtests) ====================
[11:50:35] =============== xe_rtp_process_to_sr_tests  ================
[11:50:35] [PASSED] coalesce-same-reg
[11:50:35] [PASSED] no-match-no-add
[11:50:35] [PASSED] match-or
[11:50:35] [PASSED] match-or-xfail
[11:50:35] [PASSED] no-match-no-add-multiple-rules
[11:50:35] [PASSED] two-regs-two-entries
[11:50:35] [PASSED] clr-one-set-other
[11:50:35] [PASSED] set-field
[11:50:35] [PASSED] conflict-duplicate
[11:50:35] [PASSED] conflict-not-disjoint
stty: 'standard input': Inappropriate ioctl for device
[11:50:35] [PASSED] conflict-reg-type
[11:50:35] =========== [PASSED] xe_rtp_process_to_sr_tests ============
[11:50:35] ================== xe_rtp_process_tests  ===================
[11:50:35] [PASSED] active1
[11:50:35] [PASSED] active2
[11:50:35] [PASSED] active-inactive
[11:50:35] [PASSED] inactive-active
[11:50:35] [PASSED] inactive-1st_or_active-inactive
[11:50:35] [PASSED] inactive-2nd_or_active-inactive
[11:50:35] [PASSED] inactive-last_or_active-inactive
[11:50:35] [PASSED] inactive-no_or_active-inactive
[11:50:35] ============== [PASSED] xe_rtp_process_tests ===============
[11:50:35] ===================== [PASSED] xe_rtp ======================
[11:50:35] ==================== xe_wa (1 subtest) =====================
[11:50:35] ======================== xe_wa_gt  =========================
[11:50:35] [PASSED] TIGERLAKE (B0)
[11:50:35] [PASSED] DG1 (A0)
[11:50:35] [PASSED] DG1 (B0)
[11:50:35] [PASSED] ALDERLAKE_S (A0)
[11:50:35] [PASSED] ALDERLAKE_S (B0)
[11:50:35] [PASSED] ALDERLAKE_S (C0)
[11:50:35] [PASSED] ALDERLAKE_S (D0)
[11:50:35] [PASSED] ALDERLAKE_P (A0)
[11:50:35] [PASSED] ALDERLAKE_P (B0)
[11:50:35] [PASSED] ALDERLAKE_P (C0)
[11:50:35] [PASSED] ALDERLAKE_S_RPLS (D0)
[11:50:35] [PASSED] ALDERLAKE_P_RPLU (E0)
[11:50:35] [PASSED] DG2_G10 (C0)
[11:50:35] [PASSED] DG2_G11 (B1)
[11:50:35] [PASSED] DG2_G12 (A1)
[11:50:35] [PASSED] METEORLAKE (g:A0, m:A0)
[11:50:35] [PASSED] METEORLAKE (g:A0, m:A0)
[11:50:35] [PASSED] METEORLAKE (g:A0, m:A0)
[11:50:35] [PASSED] LUNARLAKE (g:A0, m:A0)
[11:50:35] [PASSED] LUNARLAKE (g:B0, m:A0)
[11:50:35] [PASSED] BATTLEMAGE (g:A0, m:A1)
[11:50:35] ==================== [PASSED] xe_wa_gt =====================
[11:50:35] ====================== [PASSED] xe_wa ======================
[11:50:35] ============================================================
[11:50:35] Testing complete. Ran 133 tests: passed: 117, skipped: 16
[11:50:35] Elapsed time: 32.015s total, 4.781s configuring, 26.917s building, 0.289s running

+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/tests/.kunitconfig
[11:50:35] Configuring KUnit Kernel ...
Regenerating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[11:50:37] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make all compile_commands.json scripts_gdb ARCH=um O=.kunit --jobs=48
[11:50:58] Starting KUnit Kernel (1/1)...
[11:50:58] ============================================================
Running tests with:
$ .kunit/linux kunit.enable=1 mem=1G console=tty kunit_shutdown=halt
[11:50:58] == drm_test_atomic_get_connector_for_encoder (1 subtest) ===
[11:50:58] [PASSED] drm_test_drm_atomic_get_connector_for_encoder
[11:50:58] ==== [PASSED] drm_test_atomic_get_connector_for_encoder ====
[11:50:58] =========== drm_validate_clone_mode (2 subtests) ===========
[11:50:58] ============== drm_test_check_in_clone_mode  ===============
[11:50:58] [PASSED] in_clone_mode
[11:50:58] [PASSED] not_in_clone_mode
[11:50:58] ========== [PASSED] drm_test_check_in_clone_mode ===========
[11:50:58] =============== drm_test_check_valid_clones  ===============
[11:50:58] [PASSED] not_in_clone_mode
[11:50:58] [PASSED] valid_clone
[11:50:58] [PASSED] invalid_clone
[11:50:58] =========== [PASSED] drm_test_check_valid_clones ===========
[11:50:58] ============= [PASSED] drm_validate_clone_mode =============
[11:50:58] ============= drm_validate_modeset (1 subtest) =============
[11:50:58] [PASSED] drm_test_check_connector_changed_modeset
[11:50:58] ============== [PASSED] drm_validate_modeset ===============
[11:50:58] ====== drm_test_bridge_get_current_state (2 subtests) ======
[11:50:58] [PASSED] drm_test_drm_bridge_get_current_state_atomic
[11:50:58] [PASSED] drm_test_drm_bridge_get_current_state_legacy
[11:50:58] ======== [PASSED] drm_test_bridge_get_current_state ========
[11:50:58] ====== drm_test_bridge_helper_reset_crtc (3 subtests) ======
[11:50:58] [PASSED] drm_test_drm_bridge_helper_reset_crtc_atomic
[11:50:58] [PASSED] drm_test_drm_bridge_helper_reset_crtc_atomic_disabled
[11:50:58] [PASSED] drm_test_drm_bridge_helper_reset_crtc_legacy
[11:50:58] ======== [PASSED] drm_test_bridge_helper_reset_crtc ========
[11:50:58] ============== drm_bridge_alloc (2 subtests) ===============
[11:50:58] [PASSED] drm_test_drm_bridge_alloc_basic
[11:50:58] [PASSED] drm_test_drm_bridge_alloc_get_put
[11:50:58] ================ [PASSED] drm_bridge_alloc =================
[11:50:58] ================== drm_buddy (7 subtests) ==================
[11:50:58] [PASSED] drm_test_buddy_alloc_limit
[11:50:58] [PASSED] drm_test_buddy_alloc_optimistic
[11:50:58] [PASSED] drm_test_buddy_alloc_pessimistic
[11:50:58] [PASSED] drm_test_buddy_alloc_pathological
[11:50:58] [PASSED] drm_test_buddy_alloc_contiguous
[11:50:58] [PASSED] drm_test_buddy_alloc_clear
[11:50:58] [PASSED] drm_test_buddy_alloc_range_bias
[11:50:58] ==================== [PASSED] drm_buddy ====================
[11:50:58] ============= drm_cmdline_parser (40 subtests) =============
[11:50:58] [PASSED] drm_test_cmdline_force_d_only
[11:50:58] [PASSED] drm_test_cmdline_force_D_only_dvi
[11:50:58] [PASSED] drm_test_cmdline_force_D_only_hdmi
[11:50:58] [PASSED] drm_test_cmdline_force_D_only_not_digital
[11:50:58] [PASSED] drm_test_cmdline_force_e_only
[11:50:58] [PASSED] drm_test_cmdline_res
[11:50:58] [PASSED] drm_test_cmdline_res_vesa
[11:50:58] [PASSED] drm_test_cmdline_res_vesa_rblank
[11:50:58] [PASSED] drm_test_cmdline_res_rblank
[11:50:58] [PASSED] drm_test_cmdline_res_bpp
[11:50:58] [PASSED] drm_test_cmdline_res_refresh
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_interlaced
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_margins
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_force_off
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_force_on
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_force_on_analog
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_force_on_digital
[11:50:58] [PASSED] drm_test_cmdline_res_bpp_refresh_interlaced_margins_force_on
[11:50:58] [PASSED] drm_test_cmdline_res_margins_force_on
[11:50:58] [PASSED] drm_test_cmdline_res_vesa_margins
[11:50:58] [PASSED] drm_test_cmdline_name
[11:50:58] [PASSED] drm_test_cmdline_name_bpp
[11:50:58] [PASSED] drm_test_cmdline_name_option
[11:50:58] [PASSED] drm_test_cmdline_name_bpp_option
[11:50:58] [PASSED] drm_test_cmdline_rotate_0
[11:50:58] [PASSED] drm_test_cmdline_rotate_90
[11:50:58] [PASSED] drm_test_cmdline_rotate_180
[11:50:58] [PASSED] drm_test_cmdline_rotate_270
[11:50:58] [PASSED] drm_test_cmdline_hmirror
[11:50:58] [PASSED] drm_test_cmdline_vmirror
[11:50:58] [PASSED] drm_test_cmdline_margin_options
[11:50:58] [PASSED] drm_test_cmdline_multiple_options
[11:50:58] [PASSED] drm_test_cmdline_bpp_extra_and_option
[11:50:58] [PASSED] drm_test_cmdline_extra_and_option
[11:50:58] [PASSED] drm_test_cmdline_freestanding_options
[11:50:58] [PASSED] drm_test_cmdline_freestanding_force_e_and_options
[11:50:58] [PASSED] drm_test_cmdline_panel_orientation
[11:50:58] ================ drm_test_cmdline_invalid  =================
[11:50:58] [PASSED] margin_only
[11:50:58] [PASSED] interlace_only
[11:50:58] [PASSED] res_missing_x
[11:50:58] [PASSED] res_missing_y
[11:50:58] [PASSED] res_bad_y
[11:50:58] [PASSED] res_missing_y_bpp
[11:50:58] [PASSED] res_bad_bpp
[11:50:58] [PASSED] res_bad_refresh
[11:50:58] [PASSED] res_bpp_refresh_force_on_off
[11:50:58] [PASSED] res_invalid_mode
[11:50:58] [PASSED] res_bpp_wrong_place_mode
[11:50:58] [PASSED] name_bpp_refresh
[11:50:58] [PASSED] name_refresh
[11:50:58] [PASSED] name_refresh_wrong_mode
[11:50:58] [PASSED] name_refresh_invalid_mode
[11:50:58] [PASSED] rotate_multiple
[11:50:58] [PASSED] rotate_invalid_val
[11:50:58] [PASSED] rotate_truncated
[11:50:58] [PASSED] invalid_option
[11:50:58] [PASSED] invalid_tv_option
[11:50:58] [PASSED] truncated_tv_option
[11:50:58] ============ [PASSED] drm_test_cmdline_invalid =============
[11:50:58] =============== drm_test_cmdline_tv_options  ===============
[11:50:58] [PASSED] NTSC
[11:50:58] [PASSED] NTSC_443
[11:50:58] [PASSED] NTSC_J
[11:50:58] [PASSED] PAL
[11:50:58] [PASSED] PAL_M
[11:50:58] [PASSED] PAL_N
[11:50:58] [PASSED] SECAM
[11:50:58] [PASSED] MONO_525
[11:50:58] [PASSED] MONO_625
[11:50:58] =========== [PASSED] drm_test_cmdline_tv_options ===========
[11:50:58] =============== [PASSED] drm_cmdline_parser ================
[11:50:58] ========== drmm_connector_hdmi_init (20 subtests) ==========
[11:50:58] [PASSED] drm_test_connector_hdmi_init_valid
[11:50:58] [PASSED] drm_test_connector_hdmi_init_bpc_8
[11:50:58] [PASSED] drm_test_connector_hdmi_init_bpc_10
[11:50:58] [PASSED] drm_test_connector_hdmi_init_bpc_12
[11:50:58] [PASSED] drm_test_connector_hdmi_init_bpc_invalid
[11:50:58] [PASSED] drm_test_connector_hdmi_init_bpc_null
[11:50:58] [PASSED] drm_test_connector_hdmi_init_formats_empty
[11:50:58] [PASSED] drm_test_connector_hdmi_init_formats_no_rgb
[11:50:58] === drm_test_connector_hdmi_init_formats_yuv420_allowed  ===
[11:50:58] [PASSED] supported_formats=0x9 yuv420_allowed=1
[11:50:58] [PASSED] supported_formats=0x9 yuv420_allowed=0
[11:50:58] [PASSED] supported_formats=0x3 yuv420_allowed=1
[11:50:58] [PASSED] supported_formats=0x3 yuv420_allowed=0
[11:50:58] === [PASSED] drm_test_connector_hdmi_init_formats_yuv420_allowed ===
[11:50:58] [PASSED] drm_test_connector_hdmi_init_null_ddc
[11:50:58] [PASSED] drm_test_connector_hdmi_init_null_product
[11:50:58] [PASSED] drm_test_connector_hdmi_init_null_vendor
[11:50:58] [PASSED] drm_test_connector_hdmi_init_product_length_exact
[11:50:58] [PASSED] drm_test_connector_hdmi_init_product_length_too_long
[11:50:58] [PASSED] drm_test_connector_hdmi_init_product_valid
[11:50:58] [PASSED] drm_test_connector_hdmi_init_vendor_length_exact
[11:50:58] [PASSED] drm_test_connector_hdmi_init_vendor_length_too_long
[11:50:58] [PASSED] drm_test_connector_hdmi_init_vendor_valid
[11:50:58] ========= drm_test_connector_hdmi_init_type_valid  =========
[11:50:58] [PASSED] HDMI-A
[11:50:58] [PASSED] HDMI-B
[11:50:58] ===== [PASSED] drm_test_connector_hdmi_init_type_valid =====
[11:50:58] ======== drm_test_connector_hdmi_init_type_invalid  ========
[11:50:58] [PASSED] Unknown
[11:50:58] [PASSED] VGA
[11:50:58] [PASSED] DVI-I
[11:50:58] [PASSED] DVI-D
[11:50:58] [PASSED] DVI-A
[11:50:58] [PASSED] Composite
[11:50:58] [PASSED] SVIDEO
[11:50:58] [PASSED] LVDS
[11:50:58] [PASSED] Component
[11:50:58] [PASSED] DIN
[11:50:58] [PASSED] DP
[11:50:58] [PASSED] TV
[11:50:58] [PASSED] eDP
[11:50:58] [PASSED] Virtual
[11:50:58] [PASSED] DSI
[11:50:58] [PASSED] DPI
[11:50:58] [PASSED] Writeback
[11:50:58] [PASSED] SPI
[11:50:58] [PASSED] USB
[11:50:58] ==== [PASSED] drm_test_connector_hdmi_init_type_invalid ====
[11:50:58] ============ [PASSED] drmm_connector_hdmi_init =============
[11:50:58] ============= drmm_connector_init (3 subtests) =============
[11:50:58] [PASSED] drm_test_drmm_connector_init
[11:50:58] [PASSED] drm_test_drmm_connector_init_null_ddc
[11:50:58] ========= drm_test_drmm_connector_init_type_valid  =========
[11:50:58] [PASSED] Unknown
[11:50:58] [PASSED] VGA
[11:50:58] [PASSED] DVI-I
[11:50:58] [PASSED] DVI-D
[11:50:58] [PASSED] DVI-A
[11:50:58] [PASSED] Composite
[11:50:58] [PASSED] SVIDEO
[11:50:58] [PASSED] LVDS
[11:50:58] [PASSED] Component
[11:50:58] [PASSED] DIN
[11:50:58] [PASSED] DP
[11:50:58] [PASSED] HDMI-A
[11:50:58] [PASSED] HDMI-B
[11:50:58] [PASSED] TV
[11:50:58] [PASSED] eDP
[11:50:58] [PASSED] Virtual
[11:50:58] [PASSED] DSI
[11:50:58] [PASSED] DPI
[11:50:58] [PASSED] Writeback
[11:50:58] [PASSED] SPI
[11:50:58] [PASSED] USB
[11:50:58] ===== [PASSED] drm_test_drmm_connector_init_type_valid =====
[11:50:58] =============== [PASSED] drmm_connector_init ===============
[11:50:58] ========= drm_connector_dynamic_init (6 subtests) ==========
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_init
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_init_null_ddc
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_init_not_added
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_init_properties
[11:50:58] ===== drm_test_drm_connector_dynamic_init_type_valid  ======
[11:50:58] [PASSED] Unknown
[11:50:58] [PASSED] VGA
[11:50:58] [PASSED] DVI-I
[11:50:58] [PASSED] DVI-D
[11:50:58] [PASSED] DVI-A
[11:50:58] [PASSED] Composite
[11:50:58] [PASSED] SVIDEO
[11:50:58] [PASSED] LVDS
[11:50:58] [PASSED] Component
[11:50:58] [PASSED] DIN
[11:50:58] [PASSED] DP
[11:50:58] [PASSED] HDMI-A
[11:50:58] [PASSED] HDMI-B
[11:50:58] [PASSED] TV
[11:50:58] [PASSED] eDP
[11:50:58] [PASSED] Virtual
[11:50:58] [PASSED] DSI
[11:50:58] [PASSED] DPI
[11:50:58] [PASSED] Writeback
[11:50:58] [PASSED] SPI
[11:50:58] [PASSED] USB
[11:50:58] = [PASSED] drm_test_drm_connector_dynamic_init_type_valid ==
[11:50:58] ======== drm_test_drm_connector_dynamic_init_name  =========
[11:50:58] [PASSED] Unknown
[11:50:58] [PASSED] VGA
[11:50:58] [PASSED] DVI-I
[11:50:58] [PASSED] DVI-D
[11:50:58] [PASSED] DVI-A
[11:50:58] [PASSED] Composite
[11:50:58] [PASSED] SVIDEO
[11:50:58] [PASSED] LVDS
[11:50:58] [PASSED] Component
[11:50:58] [PASSED] DIN
[11:50:58] [PASSED] DP
[11:50:58] [PASSED] HDMI-A
[11:50:58] [PASSED] HDMI-B
[11:50:58] [PASSED] TV
[11:50:58] [PASSED] eDP
[11:50:58] [PASSED] Virtual
[11:50:58] [PASSED] DSI
[11:50:58] [PASSED] DPI
[11:50:58] [PASSED] Writeback
[11:50:58] [PASSED] SPI
[11:50:58] [PASSED] USB
[11:50:58] ==== [PASSED] drm_test_drm_connector_dynamic_init_name =====
[11:50:58] =========== [PASSED] drm_connector_dynamic_init ============
[11:50:58] ==== drm_connector_dynamic_register_early (4 subtests) =====
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_early_on_list
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_early_defer
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_early_no_init
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_early_no_mode_object
[11:50:58] ====== [PASSED] drm_connector_dynamic_register_early =======
[11:50:58] ======= drm_connector_dynamic_register (7 subtests) ========
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_on_list
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_no_defer
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_no_init
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_mode_object
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_sysfs
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_sysfs_name
[11:50:58] [PASSED] drm_test_drm_connector_dynamic_register_debugfs
[11:50:58] ========= [PASSED] drm_connector_dynamic_register ==========
[11:50:58] = drm_connector_attach_broadcast_rgb_property (2 subtests) =
[11:50:58] [PASSED] drm_test_drm_connector_attach_broadcast_rgb_property
[11:50:58] [PASSED] drm_test_drm_connector_attach_broadcast_rgb_property_hdmi_connector
[11:50:58] === [PASSED] drm_connector_attach_broadcast_rgb_property ===
[11:50:58] ========== drm_get_tv_mode_from_name (2 subtests) ==========
[11:50:58] ========== drm_test_get_tv_mode_from_name_valid  ===========
[11:50:58] [PASSED] NTSC
[11:50:58] [PASSED] NTSC-443
[11:50:58] [PASSED] NTSC-J
[11:50:58] [PASSED] PAL
[11:50:58] [PASSED] PAL-M
[11:50:58] [PASSED] PAL-N
[11:50:58] [PASSED] SECAM
[11:50:58] [PASSED] Mono
[11:50:58] ====== [PASSED] drm_test_get_tv_mode_from_name_valid =======
[11:50:58] [PASSED] drm_test_get_tv_mode_from_name_truncated
[11:50:58] ============ [PASSED] drm_get_tv_mode_from_name ============
[11:50:58] = drm_test_connector_hdmi_compute_mode_clock (12 subtests) =
[11:50:58] [PASSED] drm_test_drm_hdmi_compute_mode_clock_rgb
[11:50:58] [PASSED] drm_test_drm_hdmi_compute_mode_clock_rgb_10bpc
[11:50:58] [PASSED] drm_test_drm_hdmi_compute_mode_clock_rgb_10bpc_vic_1
[11:50:58] [PASSED] drm_test_drm_hdmi_compute_mode_clock_rgb_12bpc
[11:50:58] [PASSED] drm_test_drm_hdmi_compute_mode_clock_rgb_12bpc_vic_1
[11:50:58] [PASSED] drm_test_drm_hdmi_compute_mode_clock_rgb_double
[11:50:58] = drm_test_connector_hdmi_compute_mode_clock_yuv420_valid  =
[11:50:58] [PASSED] VIC 96
[11:50:58] [PASSED] VIC 97
[11:50:58] [PASSED] VIC 101
[11:50:58] [PASSED] VIC 102
[11:50:58] [PASSED] VIC 106
[11:50:58] [PASSED] VIC 107
[11:50:58] === [PASSED] drm_test_connector_hdmi_compute_mode_clock_yuv420_valid ===
[11:50:58] [PASSED] drm_test_connector_hdmi_compute_mode_clock_yuv420_10_bpc
[11:50:58] [PASSED] drm_test_connector_hdmi_compute_mode_clock_yuv420_12_bpc
[11:50:58] [PASSED] drm_test_connector_hdmi_compute_mode_clock_yuv422_8_bpc
[11:50:58] [PASSED] drm_test_connector_hdmi_compute_mode_clock_yuv422_10_bpc
[11:50:58] [PASSED] drm_test_connector_hdmi_compute_mode_clock_yuv422_12_bpc
[11:50:58] === [PASSED] drm_test_connector_hdmi_compute_mode_clock ====
[11:50:58] == drm_hdmi_connector_get_broadcast_rgb_name (2 subtests) ==
[11:50:58] === drm_test_drm_hdmi_connector_get_broadcast_rgb_name  ====
[11:50:58] [PASSED] Automatic
[11:50:58] [PASSED] Full
[11:50:58] [PASSED] Limited 16:235
[11:50:58] === [PASSED] drm_test_drm_hdmi_connector_get_broadcast_rgb_name ===
[11:50:58] [PASSED] drm_test_drm_hdmi_connector_get_broadcast_rgb_name_invalid
[11:50:58] ==== [PASSED] drm_hdmi_connector_get_broadcast_rgb_name ====
[11:50:58] == drm_hdmi_connector_get_output_format_name (2 subtests) ==
[11:50:58] === drm_test_drm_hdmi_connector_get_output_format_name  ====
[11:50:58] [PASSED] RGB
[11:50:58] [PASSED] YUV 4:2:0
[11:50:58] [PASSED] YUV 4:2:2
[11:50:58] [PASSED] YUV 4:4:4
[11:50:58] === [PASSED] drm_test_drm_hdmi_connector_get_output_format_name ===
[11:50:58] [PASSED] drm_test_drm_hdmi_connector_get_output_format_name_invalid
[11:50:58] ==== [PASSED] drm_hdmi_connector_get_output_format_name ====
[11:50:58] ============= drm_damage_helper (21 subtests) ==============
[11:50:58] [PASSED] drm_test_damage_iter_no_damage
[11:50:58] [PASSED] drm_test_damage_iter_no_damage_fractional_src
[11:50:58] [PASSED] drm_test_damage_iter_no_damage_src_moved
[11:50:58] [PASSED] drm_test_damage_iter_no_damage_fractional_src_moved
[11:50:58] [PASSED] drm_test_damage_iter_no_damage_not_visible
[11:50:58] [PASSED] drm_test_damage_iter_no_damage_no_crtc
[11:50:58] [PASSED] drm_test_damage_iter_no_damage_no_fb
[11:50:58] [PASSED] drm_test_damage_iter_simple_damage
[11:50:58] [PASSED] drm_test_damage_iter_single_damage
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_intersect_src
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_outside_src
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_fractional_src
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_intersect_fractional_src
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_outside_fractional_src
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_src_moved
[11:50:58] [PASSED] drm_test_damage_iter_single_damage_fractional_src_moved
[11:50:58] [PASSED] drm_test_damage_iter_damage
[11:50:58] [PASSED] drm_test_damage_iter_damage_one_intersect
[11:50:58] [PASSED] drm_test_damage_iter_damage_one_outside
[11:50:58] [PASSED] drm_test_damage_iter_damage_src_moved
[11:50:58] [PASSED] drm_test_damage_iter_damage_not_visible
[11:50:58] ================ [PASSED] drm_damage_helper ================
[11:50:58] ============== drm_dp_mst_helper (3 subtests) ==============
[11:50:58] ============== drm_test_dp_mst_calc_pbn_mode  ==============
[11:50:58] [PASSED] Clock 154000 BPP 30 DSC disabled
[11:50:58] [PASSED] Clock 234000 BPP 30 DSC disabled
[11:50:58] [PASSED] Clock 297000 BPP 24 DSC disabled
[11:50:58] [PASSED] Clock 332880 BPP 24 DSC enabled
[11:50:58] [PASSED] Clock 324540 BPP 24 DSC enabled
[11:50:58] ========== [PASSED] drm_test_dp_mst_calc_pbn_mode ==========
[11:50:58] ============== drm_test_dp_mst_calc_pbn_div  ===============
[11:50:58] [PASSED] Link rate 2000000 lane count 4
[11:50:58] [PASSED] Link rate 2000000 lane count 2
[11:50:58] [PASSED] Link rate 2000000 lane count 1
[11:50:58] [PASSED] Link rate 1350000 lane count 4
[11:50:58] [PASSED] Link rate 1350000 lane count 2
[11:50:58] [PASSED] Link rate 1350000 lane count 1
[11:50:58] [PASSED] Link rate 1000000 lane count 4
[11:50:58] [PASSED] Link rate 1000000 lane count 2
[11:50:58] [PASSED] Link rate 1000000 lane count 1
[11:50:58] [PASSED] Link rate 810000 lane count 4
[11:50:58] [PASSED] Link rate 810000 lane count 2
[11:50:58] [PASSED] Link rate 810000 lane count 1
[11:50:58] [PASSED] Link rate 540000 lane count 4
[11:50:58] [PASSED] Link rate 540000 lane count 2
[11:50:58] [PASSED] Link rate 540000 lane count 1
[11:50:58] [PASSED] Link rate 270000 lane count 4
[11:50:58] [PASSED] Link rate 270000 lane count 2
[11:50:58] [PASSED] Link rate 270000 lane count 1
[11:50:58] [PASSED] Link rate 162000 lane count 4
[11:50:58] [PASSED] Link rate 162000 lane count 2
[11:50:58] [PASSED] Link rate 162000 lane count 1
[11:50:58] ========== [PASSED] drm_test_dp_mst_calc_pbn_div ===========
[11:50:58] ========= drm_test_dp_mst_sideband_msg_req_decode  =========
[11:50:58] [PASSED] DP_ENUM_PATH_RESOURCES with port number
[11:50:58] [PASSED] DP_POWER_UP_PHY with port number
[11:50:58] [PASSED] DP_POWER_DOWN_PHY with port number
[11:50:58] [PASSED] DP_ALLOCATE_PAYLOAD with SDP stream sinks
[11:50:58] [PASSED] DP_ALLOCATE_PAYLOAD with port number
[11:50:58] [PASSED] DP_ALLOCATE_PAYLOAD with VCPI
[11:50:58] [PASSED] DP_ALLOCATE_PAYLOAD with PBN
[11:50:58] [PASSED] DP_QUERY_PAYLOAD with port number
[11:50:58] [PASSED] DP_QUERY_PAYLOAD with VCPI
[11:50:58] [PASSED] DP_REMOTE_DPCD_READ with port number
[11:50:58] [PASSED] DP_REMOTE_DPCD_READ with DPCD address
[11:50:58] [PASSED] DP_REMOTE_DPCD_READ with max number of bytes
[11:50:58] [PASSED] DP_REMOTE_DPCD_WRITE with port number
[11:50:58] [PASSED] DP_REMOTE_DPCD_WRITE with DPCD address
[11:50:58] [PASSED] DP_REMOTE_DPCD_WRITE with data array
[11:50:58] [PASSED] DP_REMOTE_I2C_READ with port number
[11:50:58] [PASSED] DP_REMOTE_I2C_READ with I2C device ID
[11:50:58] [PASSED] DP_REMOTE_I2C_READ with transactions array
[11:50:58] [PASSED] DP_REMOTE_I2C_WRITE with port number
[11:50:58] [PASSED] DP_REMOTE_I2C_WRITE with I2C device ID
[11:50:58] [PASSED] DP_REMOTE_I2C_WRITE with data array
[11:50:58] [PASSED] DP_QUERY_STREAM_ENC_STATUS with stream ID
[11:50:58] [PASSED] DP_QUERY_STREAM_ENC_STATUS with client ID
[11:50:58] [PASSED] DP_QUERY_STREAM_ENC_STATUS with stream event
[11:50:58] [PASSED] DP_QUERY_STREAM_ENC_STATUS with valid stream event
[11:50:58] [PASSED] DP_QUERY_STREAM_ENC_STATUS with stream behavior
[11:50:58] [PASSED] DP_QUERY_STREAM_ENC_STATUS with a valid stream behavior
[11:50:58] ===== [PASSED] drm_test_dp_mst_sideband_msg_req_decode =====
[11:50:58] ================ [PASSED] drm_dp_mst_helper ================
[11:50:58] ================== drm_exec (7 subtests) ===================
[11:50:58] [PASSED] sanitycheck
[11:50:58] [PASSED] test_lock
[11:50:58] [PASSED] test_lock_unlock
[11:50:58] [PASSED] test_duplicates
[11:50:58] [PASSED] test_prepare
[11:50:58] [PASSED] test_prepare_array
[11:50:58] [PASSED] test_multiple_loops
[11:50:58] ==================== [PASSED] drm_exec =====================
[11:50:58] =========== drm_format_helper_test (18 subtests) ===========
[11:50:58] ============== drm_test_fb_xrgb8888_to_gray8  ==============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ========== [PASSED] drm_test_fb_xrgb8888_to_gray8 ==========
[11:50:58] ============= drm_test_fb_xrgb8888_to_rgb332  ==============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ========= [PASSED] drm_test_fb_xrgb8888_to_rgb332 ==========
[11:50:58] ============= drm_test_fb_xrgb8888_to_rgb565  ==============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ========= [PASSED] drm_test_fb_xrgb8888_to_rgb565 ==========
[11:50:58] ============ drm_test_fb_xrgb8888_to_xrgb1555  =============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======== [PASSED] drm_test_fb_xrgb8888_to_xrgb1555 =========
[11:50:58] ============ drm_test_fb_xrgb8888_to_argb1555  =============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======== [PASSED] drm_test_fb_xrgb8888_to_argb1555 =========
[11:50:58] ============ drm_test_fb_xrgb8888_to_rgba5551  =============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======== [PASSED] drm_test_fb_xrgb8888_to_rgba5551 =========
[11:50:58] ============= drm_test_fb_xrgb8888_to_rgb888  ==============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ========= [PASSED] drm_test_fb_xrgb8888_to_rgb888 ==========
[11:50:58] ============= drm_test_fb_xrgb8888_to_bgr888  ==============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ========= [PASSED] drm_test_fb_xrgb8888_to_bgr888 ==========
[11:50:58] ============ drm_test_fb_xrgb8888_to_argb8888  =============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======== [PASSED] drm_test_fb_xrgb8888_to_argb8888 =========
[11:50:58] =========== drm_test_fb_xrgb8888_to_xrgb2101010  ===========
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======= [PASSED] drm_test_fb_xrgb8888_to_xrgb2101010 =======
[11:50:58] =========== drm_test_fb_xrgb8888_to_argb2101010  ===========
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======= [PASSED] drm_test_fb_xrgb8888_to_argb2101010 =======
[11:50:58] ============== drm_test_fb_xrgb8888_to_mono  ===============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ========== [PASSED] drm_test_fb_xrgb8888_to_mono ===========
[11:50:58] ==================== drm_test_fb_swab  =====================
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ================ [PASSED] drm_test_fb_swab =================
[11:50:58] ============ drm_test_fb_xrgb8888_to_xbgr8888  =============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======== [PASSED] drm_test_fb_xrgb8888_to_xbgr8888 =========
[11:50:58] ============ drm_test_fb_xrgb8888_to_abgr8888  =============
[11:50:58] [PASSED] single_pixel_source_buffer
[11:50:58] [PASSED] single_pixel_clip_rectangle
[11:50:58] [PASSED] well_known_colors
[11:50:58] [PASSED] destination_pitch
[11:50:58] ======== [PASSED] drm_test_fb_xrgb8888_to_abgr8888 =========
[11:50:58] ================= drm_test_fb_clip_offset  =================
[11:50:58] [PASSED] pass through
[11:50:58] [PASSED] horizontal offset
[11:50:58] [PASSED] vertical offset
[11:50:58] [PASSED] horizontal and vertical offset
[11:50:58] [PASSED] horizontal offset (custom pitch)
[11:50:58] [PASSED] vertical offset (custom pitch)
[11:50:58] [PASSED] horizontal and vertical offset (custom pitch)
[11:50:58] ============= [PASSED] drm_test_fb_clip_offset =============
[11:50:58] ============== drm_test_fb_build_fourcc_list  ==============
[11:50:58] [PASSED] no native formats
[11:50:58] [PASSED] XRGB8888 as native format
[11:50:58] [PASSED] remove duplicates
[11:50:58] [PASSED] convert alpha formats
[11:50:58] [PASSED] random formats
[11:50:58] ========== [PASSED] drm_test_fb_build_fourcc_list ==========
[11:50:58] =================== drm_test_fb_memcpy  ====================
[11:50:58] [PASSED] single_pixel_source_buffer: XR24 little-endian (0x34325258)
[11:50:58] [PASSED] single_pixel_source_buffer: XRA8 little-endian (0x38415258)
[11:50:58] [PASSED] single_pixel_source_buffer: YU24 little-endian (0x34325559)
[11:50:58] [PASSED] single_pixel_clip_rectangle: XB24 little-endian (0x34324258)
[11:50:58] [PASSED] single_pixel_clip_rectangle: XRA8 little-endian (0x38415258)
[11:50:58] [PASSED] single_pixel_clip_rectangle: YU24 little-endian (0x34325559)
[11:50:58] [PASSED] well_known_colors: XB24 little-endian (0x34324258)
[11:50:58] [PASSED] well_known_colors: XRA8 little-endian (0x38415258)
[11:50:58] [PASSED] well_known_colors: YU24 little-endian (0x34325559)
[11:50:58] [PASSED] destination_pitch: XB24 little-endian (0x34324258)
[11:50:58] [PASSED] destination_pitch: XRA8 little-endian (0x38415258)
[11:50:58] [PASSED] destination_pitch: YU24 little-endian (0x34325559)
[11:50:58] =============== [PASSED] drm_test_fb_memcpy ================
[11:50:58] ============= [PASSED] drm_format_helper_test ==============
[11:50:58] ================= drm_format (18 subtests) =================
[11:50:58] [PASSED] drm_test_format_block_width_invalid
[11:50:58] [PASSED] drm_test_format_block_width_one_plane
[11:50:58] [PASSED] drm_test_format_block_width_two_plane
[11:50:58] [PASSED] drm_test_format_block_width_three_plane
[11:50:58] [PASSED] drm_test_format_block_width_tiled
[11:50:58] [PASSED] drm_test_format_block_height_invalid
[11:50:58] [PASSED] drm_test_format_block_height_one_plane
[11:50:58] [PASSED] drm_test_format_block_height_two_plane
[11:50:58] [PASSED] drm_test_format_block_height_three_plane
[11:50:58] [PASSED] drm_test_format_block_height_tiled
[11:50:58] [PASSED] drm_test_format_min_pitch_invalid
[11:50:58] [PASSED] drm_test_format_min_pitch_one_plane_8bpp
[11:50:58] [PASSED] drm_test_format_min_pitch_one_plane_16bpp
[11:50:58] [PASSED] drm_test_format_min_pitch_one_plane_24bpp
[11:50:58] [PASSED] drm_test_format_min_pitch_one_plane_32bpp
[11:50:58] [PASSED] drm_test_format_min_pitch_two_plane
[11:50:58] [PASSED] drm_test_format_min_pitch_three_plane_8bpp
[11:50:58] [PASSED] drm_test_format_min_pitch_tiled
[11:50:58] =================== [PASSED] drm_format ====================
[11:50:58] ============== drm_framebuffer (10 subtests) ===============
[11:50:58] ========== drm_test_framebuffer_check_src_coords  ==========
[11:50:58] [PASSED] Success: source fits into fb
[11:50:58] [PASSED] Fail: overflowing fb with x-axis coordinate
[11:50:58] [PASSED] Fail: overflowing fb with y-axis coordinate
[11:50:58] [PASSED] Fail: overflowing fb with source width
[11:50:58] [PASSED] Fail: overflowing fb with source height
[11:50:58] ====== [PASSED] drm_test_framebuffer_check_src_coords ======
[11:50:58] [PASSED] drm_test_framebuffer_cleanup
[11:50:58] =============== drm_test_framebuffer_create  ===============
[11:50:58] [PASSED] ABGR8888 normal sizes
[11:50:58] [PASSED] ABGR8888 max sizes
[11:50:58] [PASSED] ABGR8888 pitch greater than min required
[11:50:58] [PASSED] ABGR8888 pitch less than min required
[11:50:58] [PASSED] ABGR8888 Invalid width
[11:50:58] [PASSED] ABGR8888 Invalid buffer handle
[11:50:58] [PASSED] No pixel format
[11:50:58] [PASSED] ABGR8888 Width 0
[11:50:58] [PASSED] ABGR8888 Height 0
[11:50:58] [PASSED] ABGR8888 Out of bound height * pitch combination
[11:50:58] [PASSED] ABGR8888 Large buffer offset
[11:50:58] [PASSED] ABGR8888 Buffer offset for inexistent plane
[11:50:58] [PASSED] ABGR8888 Invalid flag
[11:50:58] [PASSED] ABGR8888 Set DRM_MODE_FB_MODIFIERS without modifiers
[11:50:58] [PASSED] ABGR8888 Valid buffer modifier
[11:50:58] [PASSED] ABGR8888 Invalid buffer modifier(DRM_FORMAT_MOD_SAMSUNG_64_32_TILE)
[11:50:58] [PASSED] ABGR8888 Extra pitches without DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] ABGR8888 Extra pitches with DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] NV12 Normal sizes
[11:50:58] [PASSED] NV12 Max sizes
[11:50:58] [PASSED] NV12 Invalid pitch
[11:50:58] [PASSED] NV12 Invalid modifier/missing DRM_MODE_FB_MODIFIERS flag
[11:50:58] [PASSED] NV12 different  modifier per-plane
[11:50:58] [PASSED] NV12 with DRM_FORMAT_MOD_SAMSUNG_64_32_TILE
[11:50:58] [PASSED] NV12 Valid modifiers without DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] NV12 Modifier for inexistent plane
[11:50:58] [PASSED] NV12 Handle for inexistent plane
[11:50:58] [PASSED] NV12 Handle for inexistent plane without DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] YVU420 DRM_MODE_FB_MODIFIERS set without modifier
[11:50:58] [PASSED] YVU420 Normal sizes
[11:50:58] [PASSED] YVU420 Max sizes
[11:50:58] [PASSED] YVU420 Invalid pitch
[11:50:58] [PASSED] YVU420 Different pitches
[11:50:58] [PASSED] YVU420 Different buffer offsets/pitches
[11:50:58] [PASSED] YVU420 Modifier set just for plane 0, without DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] YVU420 Modifier set just for planes 0, 1, without DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] YVU420 Modifier set just for plane 0, 1, with DRM_MODE_FB_MODIFIERS
[11:50:58] [PASSED] YVU420 Valid modifier
[11:50:58] [PASSED] YVU420 Different modifiers per plane
[11:50:58] [PASSED] YVU420 Modifier for inexistent plane
[11:50:58] [PASSED] YUV420_10BIT Invalid modifier(DRM_FORMAT_MOD_LINEAR)
[11:50:58] [PASSED] X0L2 Normal sizes
[11:50:58] [PASSED] X0L2 Max sizes
[11:50:58] [PASSED] X0L2 Invalid pitch
[11:50:58] [PASSED] X0L2 Pitch greater than minimum required
[11:50:58] [PASSED] X0L2 Handle for inexistent plane
[11:50:58] [PASSED] X0L2 Offset for inexistent plane, without DRM_MODE_FB_MODIFIERS set
[11:50:58] [PASSED] X0L2 Modifier without DRM_MODE_FB_MODIFIERS set
[11:50:58] [PASSED] X0L2 Valid modifier
[11:50:58] [PASSED] X0L2 Modifier for inexistent plane
[11:50:58] =========== [PASSED] drm_test_framebuffer_create ===========
[11:50:58] [PASSED] drm_test_framebuffer_free
[11:50:58] [PASSED] drm_test_framebuffer_init
[11:50:58] [PASSED] drm_test_framebuffer_init_bad_format
[11:50:58] [PASSED] drm_test_framebuffer_init_dev_mismatch
[11:50:58] [PASSED] drm_test_framebuffer_lookup
[11:50:58] [PASSED] drm_test_framebuffer_lookup_inexistent
[11:50:58] [PASSED] drm_test_framebuffer_modifiers_not_supported
[11:50:58] ================= [PASSED] drm_framebuffer =================
[11:50:58] ================ drm_gem_shmem (8 subtests) ================
[11:50:58] [PASSED] drm_gem_shmem_test_obj_create
[11:50:58] [PASSED] drm_gem_shmem_test_obj_create_private
[11:50:58] [PASSED] drm_gem_shmem_test_pin_pages
[11:50:58] [PASSED] drm_gem_shmem_test_vmap
[11:50:58] [PASSED] drm_gem_shmem_test_get_pages_sgt
[11:50:58] [PASSED] drm_gem_shmem_test_get_sg_table
[11:50:58] [PASSED] drm_gem_shmem_test_madvise
[11:50:58] [PASSED] drm_gem_shmem_test_purge
[11:50:58] ================== [PASSED] drm_gem_shmem ==================
[11:50:58] === drm_atomic_helper_connector_hdmi_check (27 subtests) ===
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_auto_cea_mode
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_auto_cea_mode_vic_1
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_full_cea_mode
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_full_cea_mode_vic_1
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_limited_cea_mode
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_limited_cea_mode_vic_1
[11:50:58] ====== drm_test_check_broadcast_rgb_cea_mode_yuv420  =======
[11:50:58] [PASSED] Automatic
[11:50:58] [PASSED] Full
[11:50:58] [PASSED] Limited 16:235
[11:50:58] == [PASSED] drm_test_check_broadcast_rgb_cea_mode_yuv420 ===
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_crtc_mode_changed
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_crtc_mode_not_changed
[11:50:58] [PASSED] drm_test_check_disable_connector
[11:50:58] [PASSED] drm_test_check_hdmi_funcs_reject_rate
[11:50:58] [PASSED] drm_test_check_max_tmds_rate_bpc_fallback_rgb
[11:50:58] [PASSED] drm_test_check_max_tmds_rate_bpc_fallback_yuv420
[11:50:58] [PASSED] drm_test_check_max_tmds_rate_bpc_fallback_ignore_yuv422
[11:50:58] [PASSED] drm_test_check_max_tmds_rate_bpc_fallback_ignore_yuv420
[11:50:58] [PASSED] drm_test_check_driver_unsupported_fallback_yuv420
[11:50:58] [PASSED] drm_test_check_output_bpc_crtc_mode_changed
[11:50:58] [PASSED] drm_test_check_output_bpc_crtc_mode_not_changed
[11:50:58] [PASSED] drm_test_check_output_bpc_dvi
[11:50:58] [PASSED] drm_test_check_output_bpc_format_vic_1
[11:50:58] [PASSED] drm_test_check_output_bpc_format_display_8bpc_only
[11:50:58] [PASSED] drm_test_check_output_bpc_format_display_rgb_only
[11:50:58] [PASSED] drm_test_check_output_bpc_format_driver_8bpc_only
[11:50:58] [PASSED] drm_test_check_output_bpc_format_driver_rgb_only
[11:50:58] [PASSED] drm_test_check_tmds_char_rate_rgb_8bpc
[11:50:58] [PASSED] drm_test_check_tmds_char_rate_rgb_10bpc
[11:50:58] [PASSED] drm_test_check_tmds_char_rate_rgb_12bpc
[11:50:58] ===== [PASSED] drm_atomic_helper_connector_hdmi_check ======
[11:50:58] === drm_atomic_helper_connector_hdmi_reset (6 subtests) ====
[11:50:58] [PASSED] drm_test_check_broadcast_rgb_value
[11:50:58] [PASSED] drm_test_check_bpc_8_value
[11:50:58] [PASSED] drm_test_check_bpc_10_value
[11:50:58] [PASSED] drm_test_check_bpc_12_value
[11:50:58] [PASSED] drm_test_check_format_value
[11:50:58] [PASSED] drm_test_check_tmds_char_value
[11:50:58] ===== [PASSED] drm_atomic_helper_connector_hdmi_reset ======
[11:50:58] = drm_atomic_helper_connector_hdmi_mode_valid (4 subtests) =
[11:50:58] [PASSED] drm_test_check_mode_valid
[11:50:58] [PASSED] drm_test_check_mode_valid_reject
[11:50:58] [PASSED] drm_test_check_mode_valid_reject_rate
[11:50:58] [PASSED] drm_test_check_mode_valid_reject_max_clock
[11:50:58] === [PASSED] drm_atomic_helper_connector_hdmi_mode_valid ===
[11:50:58] ================= drm_managed (2 subtests) =================
[11:50:58] [PASSED] drm_test_managed_release_action
[11:50:58] [PASSED] drm_test_managed_run_action
[11:50:58] =================== [PASSED] drm_managed ===================
[11:50:58] =================== drm_mm (6 subtests) ====================
[11:50:58] [PASSED] drm_test_mm_init
[11:50:58] [PASSED] drm_test_mm_debug
[11:50:58] [PASSED] drm_test_mm_align32
[11:50:58] [PASSED] drm_test_mm_align64
[11:50:58] [PASSED] drm_test_mm_lowest
[11:50:58] [PASSED] drm_test_mm_highest
[11:50:58] ===================== [PASSED] drm_mm ======================
[11:50:58] ============= drm_modes_analog_tv (5 subtests) =============
[11:50:58] [PASSED] drm_test_modes_analog_tv_mono_576i
[11:50:58] [PASSED] drm_test_modes_analog_tv_ntsc_480i
[11:50:58] [PASSED] drm_test_modes_analog_tv_ntsc_480i_inlined
[11:50:58] [PASSED] drm_test_modes_analog_tv_pal_576i
[11:50:58] [PASSED] drm_test_modes_analog_tv_pal_576i_inlined
[11:50:58] =============== [PASSED] drm_modes_analog_tv ===============
[11:50:58] ============== drm_plane_helper (2 subtests) ===============
[11:50:58] =============== drm_test_check_plane_state  ================
[11:50:58] [PASSED] clipping_simple
[11:50:58] [PASSED] clipping_rotate_reflect
[11:50:58] [PASSED] positioning_simple
[11:50:58] [PASSED] upscaling
[11:50:58] [PASSED] downscaling
[11:50:58] [PASSED] rounding1
[11:50:58] [PASSED] rounding2
[11:50:58] [PASSED] rounding3
[11:50:58] [PASSED] rounding4
[11:50:58] =========== [PASSED] drm_test_check_plane_state ============
[11:50:58] =========== drm_test_check_invalid_plane_state  ============
[11:50:58] [PASSED] positioning_invalid
[11:50:58] [PASSED] upscaling_invalid
[11:50:58] [PASSED] downscaling_invalid
[11:50:58] ======= [PASSED] drm_test_check_invalid_plane_state ========
[11:50:58] ================ [PASSED] drm_plane_helper =================
[11:50:58] ====== drm_connector_helper_tv_get_modes (1 subtest) =======
[11:50:58] ====== drm_test_connector_helper_tv_get_modes_check  =======
[11:50:58] [PASSED] None
[11:50:58] [PASSED] PAL
[11:50:58] [PASSED] NTSC
[11:50:58] [PASSED] Both, NTSC Default
[11:50:58] [PASSED] Both, PAL Default
[11:50:58] [PASSED] Both, NTSC Default, with PAL on command-line
[11:50:58] [PASSED] Both, PAL Default, with NTSC on command-line
[11:50:58] == [PASSED] drm_test_connector_helper_tv_get_modes_check ===
[11:50:58] ======== [PASSED] drm_connector_helper_tv_get_modes ========
[11:50:58] ================== drm_rect (9 subtests) ===================
[11:50:58] [PASSED] drm_test_rect_clip_scaled_div_by_zero
[11:50:58] [PASSED] drm_test_rect_clip_scaled_not_clipped
[11:50:58] [PASSED] drm_test_rect_clip_scaled_clipped
[11:50:58] [PASSED] drm_test_rect_clip_scaled_signed_vs_unsigned
[11:50:58] ================= drm_test_rect_intersect  =================
[11:50:58] [PASSED] top-left x bottom-right: 2x2+1+1 x 2x2+0+0
[11:50:58] [PASSED] top-right x bottom-left: 2x2+0+0 x 2x2+1-1
[11:50:58] [PASSED] bottom-left x top-right: 2x2+1-1 x 2x2+0+0
[11:50:58] [PASSED] bottom-right x top-left: 2x2+0+0 x 2x2+1+1
[11:50:58] [PASSED] right x left: 2x1+0+0 x 3x1+1+0
[11:50:58] [PASSED] left x right: 3x1+1+0 x 2x1+0+0
[11:50:58] [PASSED] up x bottom: 1x2+0+0 x 1x3+0-1
[11:50:58] [PASSED] bottom x up: 1x3+0-1 x 1x2+0+0
[11:50:58] [PASSED] touching corner: 1x1+0+0 x 2x2+1+1
[11:50:58] [PASSED] touching side: 1x1+0+0 x 1x1+1+0
[11:50:58] [PASSED] equal rects: 2x2+0+0 x 2x2+0+0
[11:50:58] [PASSED] inside another: 2x2+0+0 x 1x1+1+1
[11:50:58] [PASSED] far away: 1x1+0+0 x 1x1+3+6
[11:50:58] [PASSED] points intersecting: 0x0+5+10 x 0x0+5+10
[11:50:58] [PASSED] points not intersecting: 0x0+0+0 x 0x0+5+10
[11:50:58] ============= [PASSED] drm_test_rect_intersect =============
[11:50:58] ================ drm_test_rect_calc_hscale  ================
[11:50:58] [PASSED] normal use
[11:50:58] [PASSED] out of max range
[11:50:58] [PASSED] out of min range
[11:50:58] [PASSED] zero dst
[11:50:58] [PASSED] negative src
[11:50:58] [PASSED] negative dst
[11:50:58] ============ [PASSED] drm_test_rect_calc_hscale ============
[11:50:58] ================ drm_test_rect_calc_vscale  ================
[11:50:58] [PASSED] normal use
[11:50:58] [PASSED] out of max range
[11:50:58] [PASSED] out of min range
[11:50:58] [PASSED] zero dst
[11:50:58] [PASSED] negative src
[11:50:58] [PASSED] negative dst
stty: 'standard input': Inappropriate ioctl for device
[11:50:58] ============ [PASSED] drm_test_rect_calc_vscale ============
[11:50:58] ================== drm_test_rect_rotate  ===================
[11:50:58] [PASSED] reflect-x
[11:50:58] [PASSED] reflect-y
[11:50:58] [PASSED] rotate-0
[11:50:58] [PASSED] rotate-90
[11:50:58] [PASSED] rotate-180
[11:50:58] [PASSED] rotate-270
[11:50:58] ============== [PASSED] drm_test_rect_rotate ===============
[11:50:58] ================ drm_test_rect_rotate_inv  =================
[11:50:58] [PASSED] reflect-x
[11:50:58] [PASSED] reflect-y
[11:50:58] [PASSED] rotate-0
[11:50:58] [PASSED] rotate-90
[11:50:58] [PASSED] rotate-180
[11:50:58] [PASSED] rotate-270
[11:50:58] ============ [PASSED] drm_test_rect_rotate_inv =============
[11:50:58] ==================== [PASSED] drm_rect =====================
[11:50:58] ============================================================
[11:50:58] Testing complete. Ran 616 tests: passed: 616
[11:50:58] Elapsed time: 23.479s total, 1.681s configuring, 21.575s building, 0.192s running

+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/ttm/tests/.kunitconfig
[11:50:58] Configuring KUnit Kernel ...
Regenerating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[11:51:00] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make all compile_commands.json scripts_gdb ARCH=um O=.kunit --jobs=48
[11:51:08] Starting KUnit Kernel (1/1)...
[11:51:08] ============================================================
Running tests with:
$ .kunit/linux kunit.enable=1 mem=1G console=tty kunit_shutdown=halt
[11:51:08] ================= ttm_device (5 subtests) ==================
[11:51:08] [PASSED] ttm_device_init_basic
[11:51:08] [PASSED] ttm_device_init_multiple
[11:51:08] [PASSED] ttm_device_fini_basic
[11:51:08] [PASSED] ttm_device_init_no_vma_man
[11:51:08] ================== ttm_device_init_pools  ==================
[11:51:08] [PASSED] No DMA allocations, no DMA32 required
[11:51:08] [PASSED] DMA allocations, DMA32 required
[11:51:08] [PASSED] No DMA allocations, DMA32 required
[11:51:08] [PASSED] DMA allocations, no DMA32 required
[11:51:08] ============== [PASSED] ttm_device_init_pools ==============
[11:51:08] =================== [PASSED] ttm_device ====================
[11:51:08] ================== ttm_pool (8 subtests) ===================
[11:51:08] ================== ttm_pool_alloc_basic  ===================
[11:51:08] [PASSED] One page
[11:51:08] [PASSED] More than one page
[11:51:08] [PASSED] Above the allocation limit
[11:51:08] [PASSED] One page, with coherent DMA mappings enabled
[11:51:08] [PASSED] Above the allocation limit, with coherent DMA mappings enabled
[11:51:08] ============== [PASSED] ttm_pool_alloc_basic ===============
[11:51:08] ============== ttm_pool_alloc_basic_dma_addr  ==============
[11:51:08] [PASSED] One page
[11:51:08] [PASSED] More than one page
[11:51:08] [PASSED] Above the allocation limit
[11:51:08] [PASSED] One page, with coherent DMA mappings enabled
[11:51:08] [PASSED] Above the allocation limit, with coherent DMA mappings enabled
[11:51:08] ========== [PASSED] ttm_pool_alloc_basic_dma_addr ==========
[11:51:08] [PASSED] ttm_pool_alloc_order_caching_match
[11:51:08] [PASSED] ttm_pool_alloc_caching_mismatch
[11:51:08] [PASSED] ttm_pool_alloc_order_mismatch
[11:51:08] [PASSED] ttm_pool_free_dma_alloc
[11:51:08] [PASSED] ttm_pool_free_no_dma_alloc
[11:51:08] [PASSED] ttm_pool_fini_basic
[11:51:08] ==================== [PASSED] ttm_pool =====================
[11:51:08] ================ ttm_resource (8 subtests) =================
[11:51:08] ================= ttm_resource_init_basic  =================
[11:51:08] [PASSED] Init resource in TTM_PL_SYSTEM
[11:51:08] [PASSED] Init resource in TTM_PL_VRAM
[11:51:08] [PASSED] Init resource in a private placement
[11:51:08] [PASSED] Init resource in TTM_PL_SYSTEM, set placement flags
[11:51:08] ============= [PASSED] ttm_resource_init_basic =============
[11:51:08] [PASSED] ttm_resource_init_pinned
[11:51:08] [PASSED] ttm_resource_fini_basic
[11:51:08] [PASSED] ttm_resource_manager_init_basic
[11:51:08] [PASSED] ttm_resource_manager_usage_basic
[11:51:08] [PASSED] ttm_resource_manager_set_used_basic
[11:51:08] [PASSED] ttm_sys_man_alloc_basic
[11:51:08] [PASSED] ttm_sys_man_free_basic
[11:51:08] ================== [PASSED] ttm_resource ===================
[11:51:08] =================== ttm_tt (15 subtests) ===================
[11:51:08] ==================== ttm_tt_init_basic  ====================
[11:51:08] [PASSED] Page-aligned size
[11:51:08] [PASSED] Extra pages requested
[11:51:08] ================ [PASSED] ttm_tt_init_basic ================
[11:51:08] [PASSED] ttm_tt_init_misaligned
[11:51:08] [PASSED] ttm_tt_fini_basic
[11:51:08] [PASSED] ttm_tt_fini_sg
[11:51:08] [PASSED] ttm_tt_fini_shmem
[11:51:08] [PASSED] ttm_tt_create_basic
[11:51:08] [PASSED] ttm_tt_create_invalid_bo_type
[11:51:08] [PASSED] ttm_tt_create_ttm_exists
[11:51:08] [PASSED] ttm_tt_create_failed
[11:51:08] [PASSED] ttm_tt_destroy_basic
[11:51:08] [PASSED] ttm_tt_populate_null_ttm
[11:51:08] [PASSED] ttm_tt_populate_populated_ttm
[11:51:08] [PASSED] ttm_tt_unpopulate_basic
[11:51:08] [PASSED] ttm_tt_unpopulate_empty_ttm
[11:51:08] [PASSED] ttm_tt_swapin_basic
[11:51:08] ===================== [PASSED] ttm_tt ======================
[11:51:08] =================== ttm_bo (14 subtests) ===================
[11:51:08] =========== ttm_bo_reserve_optimistic_no_ticket  ===========
[11:51:08] [PASSED] Cannot be interrupted and sleeps
[11:51:08] [PASSED] Cannot be interrupted, locks straight away
[11:51:08] [PASSED] Can be interrupted, sleeps
[11:51:08] ======= [PASSED] ttm_bo_reserve_optimistic_no_ticket =======
[11:51:08] [PASSED] ttm_bo_reserve_locked_no_sleep
[11:51:08] [PASSED] ttm_bo_reserve_no_wait_ticket
[11:51:08] [PASSED] ttm_bo_reserve_double_resv
[11:51:08] [PASSED] ttm_bo_reserve_interrupted
[11:51:08] [PASSED] ttm_bo_reserve_deadlock
[11:51:08] [PASSED] ttm_bo_unreserve_basic
[11:51:08] [PASSED] ttm_bo_unreserve_pinned
[11:51:08] [PASSED] ttm_bo_unreserve_bulk
[11:51:08] [PASSED] ttm_bo_put_basic
[11:51:08] [PASSED] ttm_bo_put_shared_resv
[11:51:08] [PASSED] ttm_bo_pin_basic
[11:51:08] [PASSED] ttm_bo_pin_unpin_resource
[11:51:08] [PASSED] ttm_bo_multiple_pin_one_unpin
[11:51:08] ===================== [PASSED] ttm_bo ======================
[11:51:08] ============== ttm_bo_validate (22 subtests) ===============
[11:51:08] ============== ttm_bo_init_reserved_sys_man  ===============
[11:51:08] [PASSED] Buffer object for userspace
[11:51:08] [PASSED] Kernel buffer object
[11:51:08] [PASSED] Shared buffer object
[11:51:08] ========== [PASSED] ttm_bo_init_reserved_sys_man ===========
[11:51:08] ============== ttm_bo_init_reserved_mock_man  ==============
[11:51:08] [PASSED] Buffer object for userspace
[11:51:08] [PASSED] Kernel buffer object
[11:51:08] [PASSED] Shared buffer object
[11:51:08] ========== [PASSED] ttm_bo_init_reserved_mock_man ==========
[11:51:08] [PASSED] ttm_bo_init_reserved_resv
[11:51:08] ================== ttm_bo_validate_basic  ==================
[11:51:08] [PASSED] Buffer object for userspace
[11:51:08] [PASSED] Kernel buffer object
[11:51:08] [PASSED] Shared buffer object
[11:51:08] ============== [PASSED] ttm_bo_validate_basic ==============
[11:51:08] [PASSED] ttm_bo_validate_invalid_placement
[11:51:08] ============= ttm_bo_validate_same_placement  ==============
[11:51:08] [PASSED] System manager
[11:51:08] [PASSED] VRAM manager
[11:51:08] ========= [PASSED] ttm_bo_validate_same_placement ==========
[11:51:08] [PASSED] ttm_bo_validate_failed_alloc
[11:51:08] [PASSED] ttm_bo_validate_pinned
[11:51:08] [PASSED] ttm_bo_validate_busy_placement
[11:51:08] ================ ttm_bo_validate_multihop  =================
[11:51:08] [PASSED] Buffer object for userspace
[11:51:08] [PASSED] Kernel buffer object
[11:51:08] [PASSED] Shared buffer object
[11:51:08] ============ [PASSED] ttm_bo_validate_multihop =============
[11:51:08] ========== ttm_bo_validate_no_placement_signaled  ==========
[11:51:08] [PASSED] Buffer object in system domain, no page vector
[11:51:08] [PASSED] Buffer object in system domain with an existing page vector
[11:51:08] ====== [PASSED] ttm_bo_validate_no_placement_signaled ======
[11:51:08] ======== ttm_bo_validate_no_placement_not_signaled  ========
[11:51:08] [PASSED] Buffer object for userspace
[11:51:08] [PASSED] Kernel buffer object
[11:51:08] [PASSED] Shared buffer object
[11:51:08] ==== [PASSED] ttm_bo_validate_no_placement_not_signaled ====
[11:51:08] [PASSED] ttm_bo_validate_move_fence_signaled
[11:51:08] ========= ttm_bo_validate_move_fence_not_signaled  =========
[11:51:08] [PASSED] Waits for GPU
[11:51:08] [PASSED] Tries to lock straight away
[11:51:08] ===== [PASSED] ttm_bo_validate_move_fence_not_signaled =====
[11:51:08] [PASSED] ttm_bo_validate_swapout
[11:51:08] [PASSED] ttm_bo_validate_happy_evict
[11:51:08] [PASSED] ttm_bo_validate_all_pinned_evict
[11:51:08] [PASSED] ttm_bo_validate_allowed_only_evict
[11:51:08] [PASSED] ttm_bo_validate_deleted_evict
[11:51:08] [PASSED] ttm_bo_validate_busy_domain_evict
[11:51:08] [PASSED] ttm_bo_validate_evict_gutting
[11:51:08] [PASSED] ttm_bo_validate_recrusive_evict
stty: 'standard input': Inappropriate ioctl for device
[11:51:08] ================= [PASSED] ttm_bo_validate =================
[11:51:08] ============================================================
[11:51:08] Testing complete. Ran 102 tests: passed: 102
[11:51:08] Elapsed time: 10.027s total, 1.607s configuring, 7.753s building, 0.567s running

+ cleanup
++ stat -c %u:%g /kernel
+ chown -R 1003:1003 /kernel



^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16  9:24     ` Thomas Hellström
@ 2025-06-16 12:06       ` Mrozek, Michal
  2025-06-17 14:30         ` Matthew Brost
  0 siblings, 1 reply; 14+ messages in thread
From: Mrozek, Michal @ 2025-06-16 12:06 UTC (permalink / raw)
  To: Thomas Hellström, Brost, Matthew
  Cc: intel-xe@lists.freedesktop.org, Ghimiray, Himal Prasad

> >>> > 2) Do we actually *want* to use 5 CPU cores for this?
> >>>
> >>> Yes, I profiled this with a test issuing 64MB prefetches—5 threads was
> >>> ideal. I have a comment in the code about this. Once [1] lands, we’ll
> >>> likely only need 2 threads on BMG. That would probably get us to a bus
> >>> 8× faster than BMG; for 16×, we might need more threads. But I think
> >>> we’ll always want at least 2, as there will always be some CPU
> >>> overhead that limits copy bandwidth due to serialization.
> >>
> >>What I meant was IIRC NEO has previously been picky about starting
> >>threads. Perhaps Michal can enlighten us here?

Multiple threads only gives benefits if we are able to overlap things that would otherwise keep the hardware idle.
i.e. if single CPU thread is able to saturate system -> Vram bandwidth then there is no point to have multiple threads doing the same as
all of those due to link sharing would end up later and we actually increase latencies instead of reducing those.

Simple example, if single thread saturate a link and whole copy operation is 1ms, then if you:
- run 5 copies concurrently, all of those finish at 5ms mark due to link sharing
- run 5 copies sequentially, one at a time, then first finish at 1ms, second at 2ms, third at 3ms and so on and this allows to unblock consumers way faster

Hence I would be very careful to use 5 threads to do CPU copies concurrently.
Also you may explore vector intrinsics to do the transfers, sample -> https://github.com/pmodels/mpich/blob/27229e089554fee8ac0ac9da28e56fa7dc648a45/src/mpl/src/gpu/mpl_gpu_ze.c#L3345

In general I would advise to do at most 2 copies concurrently to overlap on ramp up / ramp down between copies where machine can potentially go idle.
To much copy parallelism may give diminishing returns, especially for larger 2MB pages.

For 4KB transfer I agree we may be bottlenecked more by copy engine inefficiency and running multiple (2) small copies may give some nice results.

And also we should be pretty conservative in using CPU threads especially in higher numbers, as at scale if we take too much threads we may introduce imbalance in the system which would create baubles and compromise performance due to butterfly effect.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✓ Xe.CI.BAT: success for drm/xe: Thread prefetch of SVM ranges (rev2)
  2025-06-16  6:47 [PATCH] drm/xe: Thread prefetch of SVM ranges Matthew Brost
  2025-06-16  8:28 ` Thomas Hellström
  2025-06-16 11:51 ` ✓ CI.KUnit: success for drm/xe: Thread prefetch of SVM ranges (rev2) Patchwork
@ 2025-06-16 12:32 ` Patchwork
  2025-06-16 17:53 ` ✗ Xe.CI.Full: failure " Patchwork
  2025-06-17 12:43 ` [PATCH] drm/xe: Thread prefetch of SVM ranges Thomas Hellström
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2025-06-16 12:32 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-xe

[-- Attachment #1: Type: text/plain, Size: 956 bytes --]

== Series Details ==

Series: drm/xe: Thread prefetch of SVM ranges (rev2)
URL   : https://patchwork.freedesktop.org/series/149605/
State : success

== Summary ==

CI Bug Log - changes from xe-3254-6e474d767e318b98cc45d4b90095290879085741_BAT -> xe-pw-149605v2_BAT
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (8 -> 8)
------------------------------

  No changes in participating hosts


Changes
-------

  No changes found


Build changes
-------------

  * Linux: xe-3254-6e474d767e318b98cc45d4b90095290879085741 -> xe-pw-149605v2

  IGT_8411: d5b5d2bb4f8795a98ea58376a128b74f654b7ec1 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  xe-3254-6e474d767e318b98cc45d4b90095290879085741: 6e474d767e318b98cc45d4b90095290879085741
  xe-pw-149605v2: 149605v2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/index.html

[-- Attachment #2: Type: text/html, Size: 1504 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✗ Xe.CI.Full: failure for drm/xe: Thread prefetch of SVM ranges (rev2)
  2025-06-16  6:47 [PATCH] drm/xe: Thread prefetch of SVM ranges Matthew Brost
                   ` (2 preceding siblings ...)
  2025-06-16 12:32 ` ✓ Xe.CI.BAT: " Patchwork
@ 2025-06-16 17:53 ` Patchwork
  2025-06-17 12:43 ` [PATCH] drm/xe: Thread prefetch of SVM ranges Thomas Hellström
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2025-06-16 17:53 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-xe

[-- Attachment #1: Type: text/plain, Size: 101284 bytes --]

== Series Details ==

Series: drm/xe: Thread prefetch of SVM ranges (rev2)
URL   : https://patchwork.freedesktop.org/series/149605/
State : failure

== Summary ==

CI Bug Log - changes from xe-3254-6e474d767e318b98cc45d4b90095290879085741_FULL -> xe-pw-149605v2_FULL
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with xe-pw-149605v2_FULL absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in xe-pw-149605v2_FULL, please notify your bug team (I915-ci-infra@lists.freedesktop.org) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (4 -> 4)
------------------------------

  No changes in participating hosts

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in xe-pw-149605v2_FULL:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_flip@flip-vs-absolute-wf_vblank@d-hdmi-a1:
    - shard-adlp:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@kms_flip@flip-vs-absolute-wf_vblank@d-hdmi-a1.html
   [2]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-9/igt@kms_flip@flip-vs-absolute-wf_vblank@d-hdmi-a1.html

  
Known issues
------------

  Here are the changes found in xe-pw-149605v2_FULL that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@core_hotunplug@hotrebind-lateclose:
    - shard-adlp:         NOTRUN -> [SKIP][3] ([Intel XE#4963])
   [3]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@core_hotunplug@hotrebind-lateclose.html

  * igt@kms_addfb_basic@bad-pitch-63:
    - shard-adlp:         [PASS][4] -> [SKIP][5] ([Intel XE#4950]) +10 other tests skip
   [4]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_addfb_basic@bad-pitch-63.html
   [5]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_addfb_basic@bad-pitch-63.html

  * igt@kms_async_flips@async-flip-suspend-resume@pipe-d-dp-4:
    - shard-dg2-set2:     NOTRUN -> [FAIL][6] ([Intel XE#4427]) +1 other test fail
   [6]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_async_flips@async-flip-suspend-resume@pipe-d-dp-4.html

  * igt@kms_async_flips@async-flip-with-page-flip-events-tiled-atomic@pipe-a-hdmi-a-6-4-rc-ccs-cc:
    - shard-dg2-set2:     NOTRUN -> [SKIP][7] ([Intel XE#3767]) +15 other tests skip
   [7]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_async_flips@async-flip-with-page-flip-events-tiled-atomic@pipe-a-hdmi-a-6-4-rc-ccs-cc.html

  * igt@kms_big_fb@4-tiled-64bpp-rotate-270:
    - shard-dg2-set2:     NOTRUN -> [SKIP][8] ([Intel XE#316])
   [8]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_big_fb@4-tiled-64bpp-rotate-270.html

  * igt@kms_big_fb@4-tiled-8bpp-rotate-0:
    - shard-adlp:         NOTRUN -> [SKIP][9] ([Intel XE#1124]) +1 other test skip
   [9]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_big_fb@4-tiled-8bpp-rotate-0.html

  * igt@kms_big_fb@linear-64bpp-rotate-90:
    - shard-adlp:         NOTRUN -> [SKIP][10] ([Intel XE#316])
   [10]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_big_fb@linear-64bpp-rotate-90.html

  * igt@kms_big_fb@x-tiled-32bpp-rotate-270:
    - shard-bmg:          NOTRUN -> [SKIP][11] ([Intel XE#2327]) +1 other test skip
   [11]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_big_fb@x-tiled-32bpp-rotate-270.html

  * igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-180-async-flip:
    - shard-bmg:          NOTRUN -> [SKIP][12] ([Intel XE#1124]) +8 other tests skip
   [12]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-180-async-flip.html

  * igt@kms_big_fb@yf-tiled-addfb-size-offset-overflow:
    - shard-bmg:          NOTRUN -> [SKIP][13] ([Intel XE#607])
   [13]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_big_fb@yf-tiled-addfb-size-offset-overflow.html

  * igt@kms_big_fb@yf-tiled-max-hw-stride-32bpp-rotate-0-async-flip:
    - shard-dg2-set2:     NOTRUN -> [SKIP][14] ([Intel XE#1124]) +4 other tests skip
   [14]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_big_fb@yf-tiled-max-hw-stride-32bpp-rotate-0-async-flip.html

  * igt@kms_bw@connected-linear-tiling-2-displays-2560x1440p:
    - shard-bmg:          [PASS][15] -> [SKIP][16] ([Intel XE#2314] / [Intel XE#2894]) +1 other test skip
   [15]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-2/igt@kms_bw@connected-linear-tiling-2-displays-2560x1440p.html
   [16]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@kms_bw@connected-linear-tiling-2-displays-2560x1440p.html

  * igt@kms_bw@connected-linear-tiling-3-displays-1920x1080p:
    - shard-dg2-set2:     NOTRUN -> [SKIP][17] ([Intel XE#2191])
   [17]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_bw@connected-linear-tiling-3-displays-1920x1080p.html

  * igt@kms_bw@connected-linear-tiling-4-displays-3840x2160p:
    - shard-bmg:          NOTRUN -> [SKIP][18] ([Intel XE#2314] / [Intel XE#2894]) +1 other test skip
   [18]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_bw@connected-linear-tiling-4-displays-3840x2160p.html

  * igt@kms_bw@linear-tiling-2-displays-1920x1080p:
    - shard-bmg:          NOTRUN -> [SKIP][19] ([Intel XE#367]) +1 other test skip
   [19]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_bw@linear-tiling-2-displays-1920x1080p.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][20] ([Intel XE#367])
   [20]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_bw@linear-tiling-2-displays-1920x1080p.html

  * igt@kms_bw@linear-tiling-4-displays-3840x2160p:
    - shard-adlp:         NOTRUN -> [SKIP][21] ([Intel XE#367])
   [21]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_bw@linear-tiling-4-displays-3840x2160p.html

  * igt@kms_ccs@bad-rotation-90-y-tiled-gen12-rc-ccs-cc:
    - shard-adlp:         NOTRUN -> [SKIP][22] ([Intel XE#4947]) +17 other tests skip
   [22]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_ccs@bad-rotation-90-y-tiled-gen12-rc-ccs-cc.html

  * igt@kms_ccs@crc-primary-basic-4-tiled-bmg-ccs:
    - shard-dg2-set2:     NOTRUN -> [SKIP][23] ([Intel XE#2907])
   [23]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_ccs@crc-primary-basic-4-tiled-bmg-ccs.html

  * igt@kms_ccs@crc-primary-basic-4-tiled-mtl-mc-ccs@pipe-c-hdmi-a-1:
    - shard-adlp:         NOTRUN -> [SKIP][24] ([Intel XE#787]) +11 other tests skip
   [24]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_ccs@crc-primary-basic-4-tiled-mtl-mc-ccs@pipe-c-hdmi-a-1.html

  * igt@kms_ccs@crc-primary-basic-4-tiled-mtl-rc-ccs@pipe-b-hdmi-a-6:
    - shard-dg2-set2:     NOTRUN -> [SKIP][25] ([Intel XE#787]) +132 other tests skip
   [25]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_ccs@crc-primary-basic-4-tiled-mtl-rc-ccs@pipe-b-hdmi-a-6.html

  * igt@kms_ccs@crc-primary-rotation-180-4-tiled-dg2-rc-ccs-cc@pipe-d-hdmi-a-1:
    - shard-adlp:         NOTRUN -> [SKIP][26] ([Intel XE#455] / [Intel XE#787]) +7 other tests skip
   [26]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_ccs@crc-primary-rotation-180-4-tiled-dg2-rc-ccs-cc@pipe-d-hdmi-a-1.html

  * igt@kms_ccs@crc-primary-suspend-4-tiled-dg2-mc-ccs:
    - shard-bmg:          NOTRUN -> [SKIP][27] ([Intel XE#3432]) +1 other test skip
   [27]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_ccs@crc-primary-suspend-4-tiled-dg2-mc-ccs.html

  * igt@kms_ccs@crc-sprite-planes-basic-4-tiled-lnl-ccs@pipe-b-dp-2:
    - shard-bmg:          NOTRUN -> [SKIP][28] ([Intel XE#2652] / [Intel XE#787]) +3 other tests skip
   [28]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_ccs@crc-sprite-planes-basic-4-tiled-lnl-ccs@pipe-b-dp-2.html

  * igt@kms_ccs@missing-ccs-buffer-4-tiled-mtl-mc-ccs@pipe-d-dp-4:
    - shard-dg2-set2:     NOTRUN -> [SKIP][29] ([Intel XE#455] / [Intel XE#787]) +23 other tests skip
   [29]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_ccs@missing-ccs-buffer-4-tiled-mtl-mc-ccs@pipe-d-dp-4.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-dp-4:
    - shard-dg2-set2:     [PASS][30] -> [INCOMPLETE][31] ([Intel XE#3124])
   [30]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-463/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-dp-4.html
   [31]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-466/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-dp-4.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-hdmi-a-6:
    - shard-dg2-set2:     [PASS][32] -> [DMESG-WARN][33] ([Intel XE#1727] / [Intel XE#3113])
   [32]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-463/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-hdmi-a-6.html
   [33]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-466/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-hdmi-a-6.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc:
    - shard-bmg:          NOTRUN -> [SKIP][34] ([Intel XE#2887]) +7 other tests skip
   [34]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc.html

  * igt@kms_cdclk@mode-transition:
    - shard-dg2-set2:     [PASS][35] -> [SKIP][36] ([Intel XE#2231] / [Intel XE#4208]) +4 other tests skip
   [35]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_cdclk@mode-transition.html
   [36]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_cdclk@mode-transition.html

  * igt@kms_cdclk@mode-transition-all-outputs:
    - shard-bmg:          NOTRUN -> [SKIP][37] ([Intel XE#2724])
   [37]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_cdclk@mode-transition-all-outputs.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][38] ([Intel XE#4418])
   [38]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_cdclk@mode-transition-all-outputs.html

  * igt@kms_cdclk@plane-scaling@pipe-b-dp-2:
    - shard-dg2-set2:     NOTRUN -> [SKIP][39] ([Intel XE#4416]) +3 other tests skip
   [39]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_cdclk@plane-scaling@pipe-b-dp-2.html

  * igt@kms_chamelium_color@ctm-0-50:
    - shard-bmg:          NOTRUN -> [SKIP][40] ([Intel XE#2325]) +1 other test skip
   [40]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_chamelium_color@ctm-0-50.html

  * igt@kms_chamelium_color@ctm-blue-to-red:
    - shard-dg2-set2:     NOTRUN -> [SKIP][41] ([Intel XE#306])
   [41]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_chamelium_color@ctm-blue-to-red.html

  * igt@kms_chamelium_color@ctm-limited-range:
    - shard-adlp:         NOTRUN -> [SKIP][42] ([Intel XE#306])
   [42]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_chamelium_color@ctm-limited-range.html

  * igt@kms_chamelium_edid@vga-edid-read:
    - shard-adlp:         NOTRUN -> [SKIP][43] ([Intel XE#373])
   [43]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_chamelium_edid@vga-edid-read.html

  * igt@kms_chamelium_frames@dp-crc-single:
    - shard-bmg:          NOTRUN -> [SKIP][44] ([Intel XE#2252]) +7 other tests skip
   [44]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_chamelium_frames@dp-crc-single.html

  * igt@kms_chamelium_hpd@vga-hpd:
    - shard-dg2-set2:     NOTRUN -> [SKIP][45] ([Intel XE#373]) +2 other tests skip
   [45]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_chamelium_hpd@vga-hpd.html

  * igt@kms_content_protection@atomic-dpms@pipe-a-dp-2:
    - shard-bmg:          NOTRUN -> [FAIL][46] ([Intel XE#1178])
   [46]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-4/igt@kms_content_protection@atomic-dpms@pipe-a-dp-2.html

  * igt@kms_content_protection@content-type-change:
    - shard-adlp:         NOTRUN -> [SKIP][47] ([Intel XE#455]) +3 other tests skip
   [47]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_content_protection@content-type-change.html

  * igt@kms_content_protection@dp-mst-lic-type-0:
    - shard-dg2-set2:     NOTRUN -> [SKIP][48] ([Intel XE#307])
   [48]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_content_protection@dp-mst-lic-type-0.html
    - shard-bmg:          NOTRUN -> [SKIP][49] ([Intel XE#2390])
   [49]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_content_protection@dp-mst-lic-type-0.html

  * igt@kms_content_protection@uevent:
    - shard-dg2-set2:     NOTRUN -> [FAIL][50] ([Intel XE#1188]) +1 other test fail
   [50]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_content_protection@uevent.html

  * igt@kms_cursor_crc@cursor-offscreen-256x85:
    - shard-bmg:          NOTRUN -> [SKIP][51] ([Intel XE#2320]) +5 other tests skip
   [51]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_cursor_crc@cursor-offscreen-256x85.html

  * igt@kms_cursor_crc@cursor-offscreen-512x170:
    - shard-bmg:          NOTRUN -> [SKIP][52] ([Intel XE#2321]) +1 other test skip
   [52]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_cursor_crc@cursor-offscreen-512x170.html

  * igt@kms_cursor_crc@cursor-rapid-movement-512x170:
    - shard-dg2-set2:     NOTRUN -> [SKIP][53] ([Intel XE#308])
   [53]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_cursor_crc@cursor-rapid-movement-512x170.html

  * igt@kms_cursor_crc@cursor-sliding-256x256:
    - shard-adlp:         NOTRUN -> [SKIP][54] ([Intel XE#4950]) +10 other tests skip
   [54]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_cursor_crc@cursor-sliding-256x256.html

  * igt@kms_cursor_legacy@cursorb-vs-flipb-toggle:
    - shard-bmg:          [PASS][55] -> [SKIP][56] ([Intel XE#2291]) +4 other tests skip
   [55]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_cursor_legacy@cursorb-vs-flipb-toggle.html
   [56]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-6/igt@kms_cursor_legacy@cursorb-vs-flipb-toggle.html

  * igt@kms_cursor_legacy@short-busy-flip-before-cursor-atomic-transitions-varying-size:
    - shard-dg2-set2:     NOTRUN -> [SKIP][57] ([Intel XE#323])
   [57]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_cursor_legacy@short-busy-flip-before-cursor-atomic-transitions-varying-size.html

  * igt@kms_dirtyfb@fbc-dirtyfb-ioctl:
    - shard-bmg:          NOTRUN -> [SKIP][58] ([Intel XE#4210])
   [58]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_dirtyfb@fbc-dirtyfb-ioctl.html

  * igt@kms_display_modes@extended-mode-basic:
    - shard-bmg:          [PASS][59] -> [SKIP][60] ([Intel XE#4302])
   [59]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_display_modes@extended-mode-basic.html
   [60]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-6/igt@kms_display_modes@extended-mode-basic.html

  * igt@kms_dsc@dsc-basic:
    - shard-bmg:          NOTRUN -> [SKIP][61] ([Intel XE#2244])
   [61]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_dsc@dsc-basic.html

  * igt@kms_fbc_dirty_rect@fbc-dirty-rectangle-dirtyfb-tests:
    - shard-bmg:          NOTRUN -> [SKIP][62] ([Intel XE#4422])
   [62]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_fbc_dirty_rect@fbc-dirty-rectangle-dirtyfb-tests.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][63] ([Intel XE#4422])
   [63]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_fbc_dirty_rect@fbc-dirty-rectangle-dirtyfb-tests.html

  * igt@kms_feature_discovery@chamelium:
    - shard-bmg:          NOTRUN -> [SKIP][64] ([Intel XE#2372])
   [64]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_feature_discovery@chamelium.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible:
    - shard-dg2-set2:     [PASS][65] -> [FAIL][66] ([Intel XE#2882] / [Intel XE#3098] / [Intel XE#886])
   [65]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-466/igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible.html
   [66]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible@ab-hdmi-a2-dp2:
    - shard-dg2-set2:     NOTRUN -> [FAIL][67] ([Intel XE#3098])
   [67]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible@ab-hdmi-a2-dp2.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible@ac-hdmi-a2-dp2:
    - shard-dg2-set2:     NOTRUN -> [FAIL][68] ([Intel XE#2882] / [Intel XE#886])
   [68]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible@ac-hdmi-a2-dp2.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible@ad-hdmi-a2-dp2:
    - shard-dg2-set2:     NOTRUN -> [FAIL][69] ([Intel XE#886])
   [69]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible@ad-hdmi-a2-dp2.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank@ab-dp2-hdmi-a3:
    - shard-bmg:          NOTRUN -> [FAIL][70] ([Intel XE#2882])
   [70]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-4/igt@kms_flip@2x-flip-vs-absolute-wf_vblank@ab-dp2-hdmi-a3.html

  * igt@kms_flip@2x-flip-vs-dpms-off-vs-modeset-interruptible:
    - shard-adlp:         NOTRUN -> [SKIP][71] ([Intel XE#310])
   [71]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_flip@2x-flip-vs-dpms-off-vs-modeset-interruptible.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a6-dp4:
    - shard-dg2-set2:     [PASS][72] -> [FAIL][73] ([Intel XE#301] / [Intel XE#3321]) +1 other test fail
   [72]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-436/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a6-dp4.html
   [73]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-436/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a6-dp4.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@cd-hdmi-a6-dp4:
    - shard-dg2-set2:     [PASS][74] -> [FAIL][75] ([Intel XE#301]) +3 other tests fail
   [74]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-436/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@cd-hdmi-a6-dp4.html
   [75]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-436/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@cd-hdmi-a6-dp4.html

  * igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a6-dp4:
    - shard-dg2-set2:     NOTRUN -> [FAIL][76] ([Intel XE#301]) +2 other tests fail
   [76]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a6-dp4.html

  * igt@kms_flip@2x-nonexisting-fb:
    - shard-bmg:          [PASS][77] -> [SKIP][78] ([Intel XE#2316]) +7 other tests skip
   [77]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-2/igt@kms_flip@2x-nonexisting-fb.html
   [78]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@kms_flip@2x-nonexisting-fb.html

  * igt@kms_flip@flip-vs-absolute-wf_vblank@b-hdmi-a1:
    - shard-adlp:         [PASS][79] -> [FAIL][80] ([Intel XE#2882]) +2 other tests fail
   [79]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@kms_flip@flip-vs-absolute-wf_vblank@b-hdmi-a1.html
   [80]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-9/igt@kms_flip@flip-vs-absolute-wf_vblank@b-hdmi-a1.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-bmg:          [PASS][81] -> [INCOMPLETE][82] ([Intel XE#2049] / [Intel XE#2597]) +1 other test incomplete
   [81]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [82]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-6/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-downscaling:
    - shard-dg2-set2:     NOTRUN -> [SKIP][83] ([Intel XE#455]) +8 other tests skip
   [83]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-downscaling.html

  * igt@kms_flip_scaled_crc@flip-64bpp-4tile-to-32bpp-4tiledg2rcccs-downscaling:
    - shard-bmg:          NOTRUN -> [SKIP][84] ([Intel XE#2380]) +1 other test skip
   [84]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_flip_scaled_crc@flip-64bpp-4tile-to-32bpp-4tiledg2rcccs-downscaling.html

  * igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-upscaling:
    - shard-bmg:          NOTRUN -> [SKIP][85] ([Intel XE#2293] / [Intel XE#2380]) +1 other test skip
   [85]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-8/igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-upscaling.html

  * igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-upscaling@pipe-a-valid-mode:
    - shard-bmg:          NOTRUN -> [SKIP][86] ([Intel XE#2293]) +1 other test skip
   [86]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-8/igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-upscaling@pipe-a-valid-mode.html

  * igt@kms_frontbuffer_tracking@drrs-1p-offscren-pri-indfb-draw-blt:
    - shard-bmg:          NOTRUN -> [SKIP][87] ([Intel XE#2311]) +20 other tests skip
   [87]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_frontbuffer_tracking@drrs-1p-offscren-pri-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-mmap-wc:
    - shard-adlp:         NOTRUN -> [SKIP][88] ([Intel XE#656]) +5 other tests skip
   [88]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt:
    - shard-bmg:          NOTRUN -> [SKIP][89] ([Intel XE#4141]) +11 other tests skip
   [89]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
    - shard-adlp:         [PASS][90] -> [SKIP][91] ([Intel XE#4947]) +1 other test skip
   [90]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_frontbuffer_tracking@fbc-1p-rte.html
   [91]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_frontbuffer_tracking@fbc-1p-rte.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-indfb-msflip-blt:
    - shard-adlp:         NOTRUN -> [SKIP][92] ([Intel XE#651]) +1 other test skip
   [92]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-indfb-msflip-blt.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-2p-primscrn-pri-indfb-draw-mmap-wc:
    - shard-dg2-set2:     NOTRUN -> [SKIP][93] ([Intel XE#651]) +12 other tests skip
   [93]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_frontbuffer_tracking@fbcdrrs-2p-primscrn-pri-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-2p-scndscrn-cur-indfb-onoff:
    - shard-adlp:         NOTRUN -> [SKIP][94] ([Intel XE#2351] / [Intel XE#4947]) +1 other test skip
   [94]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_frontbuffer_tracking@fbcdrrs-2p-scndscrn-cur-indfb-onoff.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-blt:
    - shard-bmg:          NOTRUN -> [SKIP][95] ([Intel XE#2313]) +21 other tests skip
   [95]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-rgb565-draw-mmap-wc:
    - shard-adlp:         NOTRUN -> [SKIP][96] ([Intel XE#653]) +4 other tests skip
   [96]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_frontbuffer_tracking@fbcpsr-rgb565-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@psr-2p-primscrn-shrfb-msflip-blt:
    - shard-dg2-set2:     NOTRUN -> [SKIP][97] ([Intel XE#2231] / [Intel XE#4208]) +21 other tests skip
   [97]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_frontbuffer_tracking@psr-2p-primscrn-shrfb-msflip-blt.html

  * igt@kms_frontbuffer_tracking@psr-slowdraw:
    - shard-dg2-set2:     NOTRUN -> [SKIP][98] ([Intel XE#653]) +13 other tests skip
   [98]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_frontbuffer_tracking@psr-slowdraw.html

  * igt@kms_hdr@brightness-with-hdr:
    - shard-bmg:          NOTRUN -> [SKIP][99] ([Intel XE#3374] / [Intel XE#3544])
   [99]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_hdr@brightness-with-hdr.html

  * igt@kms_hdr@invalid-hdr:
    - shard-bmg:          [PASS][100] -> [SKIP][101] ([Intel XE#1503])
   [100]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-8/igt@kms_hdr@invalid-hdr.html
   [101]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_hdr@invalid-hdr.html

  * igt@kms_joiner@basic-force-ultra-joiner:
    - shard-dg2-set2:     NOTRUN -> [SKIP][102] ([Intel XE#2925]) +1 other test skip
   [102]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_joiner@basic-force-ultra-joiner.html

  * igt@kms_joiner@invalid-modeset-big-joiner:
    - shard-adlp:         NOTRUN -> [SKIP][103] ([Intel XE#346])
   [103]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_joiner@invalid-modeset-big-joiner.html

  * igt@kms_lease@lease-uevent:
    - shard-adlp:         [PASS][104] -> [DMESG-WARN][105] ([Intel XE#2953] / [Intel XE#4173])
   [104]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-3/igt@kms_lease@lease-uevent.html
   [105]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-6/igt@kms_lease@lease-uevent.html

  * igt@kms_plane_cursor@overlay@pipe-a-hdmi-a-6-size-64:
    - shard-dg2-set2:     NOTRUN -> [FAIL][106] ([Intel XE#616]) +1 other test fail
   [106]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_plane_cursor@overlay@pipe-a-hdmi-a-6-size-64.html

  * igt@kms_plane_lowres@tiling-4:
    - shard-dg2-set2:     [PASS][107] -> [SKIP][108] ([Intel XE#4208] / [i915#2575]) +13 other tests skip
   [107]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_plane_lowres@tiling-4.html
   [108]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_plane_lowres@tiling-4.html

  * igt@kms_plane_multiple@2x-tiling-4:
    - shard-bmg:          [PASS][109] -> [SKIP][110] ([Intel XE#4596])
   [109]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_plane_multiple@2x-tiling-4.html
   [110]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-6/igt@kms_plane_multiple@2x-tiling-4.html

  * igt@kms_plane_multiple@2x-tiling-none:
    - shard-adlp:         NOTRUN -> [SKIP][111] ([Intel XE#4596])
   [111]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_plane_multiple@2x-tiling-none.html

  * igt@kms_plane_multiple@2x-tiling-yf:
    - shard-bmg:          NOTRUN -> [SKIP][112] ([Intel XE#5021])
   [112]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_plane_multiple@2x-tiling-yf.html

  * igt@kms_plane_scaling@2x-scaler-multi-pipe:
    - shard-adlp:         NOTRUN -> [SKIP][113] ([Intel XE#309]) +2 other tests skip
   [113]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_plane_scaling@2x-scaler-multi-pipe.html

  * igt@kms_plane_scaling@plane-upscale-factor-0-25-with-modifiers:
    - shard-dg2-set2:     NOTRUN -> [SKIP][114] ([Intel XE#4208] / [i915#2575]) +10 other tests skip
   [114]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_plane_scaling@plane-upscale-factor-0-25-with-modifiers.html

  * igt@kms_pm_backlight@fade-with-suspend:
    - shard-bmg:          NOTRUN -> [SKIP][115] ([Intel XE#870])
   [115]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_pm_backlight@fade-with-suspend.html

  * igt@kms_pm_dc@dc5-psr:
    - shard-adlp:         NOTRUN -> [SKIP][116] ([Intel XE#1129])
   [116]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_pm_dc@dc5-psr.html

  * igt@kms_pm_dc@dc5-retention-flops:
    - shard-dg2-set2:     NOTRUN -> [SKIP][117] ([Intel XE#3309])
   [117]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_pm_dc@dc5-retention-flops.html

  * igt@kms_pm_rpm@dpms-lpsp:
    - shard-bmg:          NOTRUN -> [SKIP][118] ([Intel XE#1439] / [Intel XE#3141] / [Intel XE#836])
   [118]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_pm_rpm@dpms-lpsp.html

  * igt@kms_psr2_sf@pr-overlay-plane-update-sf-dmg-area:
    - shard-bmg:          NOTRUN -> [SKIP][119] ([Intel XE#1489]) +5 other tests skip
   [119]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_psr2_sf@pr-overlay-plane-update-sf-dmg-area.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][120] ([Intel XE#1489]) +3 other tests skip
   [120]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_psr2_sf@pr-overlay-plane-update-sf-dmg-area.html

  * igt@kms_psr2_sf@psr2-cursor-plane-move-continuous-exceed-fully-sf:
    - shard-adlp:         NOTRUN -> [SKIP][121] ([Intel XE#1489]) +1 other test skip
   [121]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_psr2_sf@psr2-cursor-plane-move-continuous-exceed-fully-sf.html

  * igt@kms_psr2_su@page_flip-xrgb8888:
    - shard-bmg:          NOTRUN -> [SKIP][122] ([Intel XE#2387])
   [122]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_psr2_su@page_flip-xrgb8888.html

  * igt@kms_psr@fbc-pr-primary-blt:
    - shard-adlp:         NOTRUN -> [SKIP][123] ([Intel XE#2850] / [Intel XE#929]) +3 other tests skip
   [123]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@kms_psr@fbc-pr-primary-blt.html

  * igt@kms_psr@fbc-psr2-sprite-plane-move:
    - shard-dg2-set2:     NOTRUN -> [SKIP][124] ([Intel XE#2850] / [Intel XE#929]) +5 other tests skip
   [124]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_psr@fbc-psr2-sprite-plane-move.html

  * igt@kms_psr@pr-sprite-plane-onoff:
    - shard-bmg:          NOTRUN -> [SKIP][125] ([Intel XE#2234] / [Intel XE#2850]) +7 other tests skip
   [125]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_psr@pr-sprite-plane-onoff.html

  * igt@kms_psr_stress_test@invalidate-primary-flip-overlay:
    - shard-bmg:          NOTRUN -> [SKIP][126] ([Intel XE#2414])
   [126]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_psr_stress_test@invalidate-primary-flip-overlay.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][127] ([Intel XE#2939])
   [127]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_psr_stress_test@invalidate-primary-flip-overlay.html

  * igt@kms_rotation_crc@primary-y-tiled-reflect-x-0:
    - shard-bmg:          NOTRUN -> [SKIP][128] ([Intel XE#2330])
   [128]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_rotation_crc@primary-y-tiled-reflect-x-0.html

  * igt@kms_rotation_crc@primary-y-tiled-reflect-x-270:
    - shard-bmg:          NOTRUN -> [SKIP][129] ([Intel XE#3414] / [Intel XE#3904]) +1 other test skip
   [129]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_rotation_crc@primary-y-tiled-reflect-x-270.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][130] ([Intel XE#3414]) +1 other test skip
   [130]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_rotation_crc@primary-y-tiled-reflect-x-270.html

  * igt@kms_setmode@clone-exclusive-crtc:
    - shard-bmg:          [PASS][131] -> [SKIP][132] ([Intel XE#1435]) +1 other test skip
   [131]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_setmode@clone-exclusive-crtc.html
   [132]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@kms_setmode@clone-exclusive-crtc.html

  * igt@kms_tiled_display@basic-test-pattern:
    - shard-bmg:          NOTRUN -> [SKIP][133] ([Intel XE#2426])
   [133]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_tiled_display@basic-test-pattern.html

  * igt@kms_tiled_display@basic-test-pattern-with-chamelium:
    - shard-bmg:          NOTRUN -> [SKIP][134] ([Intel XE#2509])
   [134]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_tiled_display@basic-test-pattern-with-chamelium.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][135] ([Intel XE#1500])
   [135]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_tiled_display@basic-test-pattern-with-chamelium.html

  * igt@kms_vrr@cmrr@pipe-a-edp-1:
    - shard-lnl:          [PASS][136] -> [FAIL][137] ([Intel XE#4459]) +1 other test fail
   [136]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-lnl-7/igt@kms_vrr@cmrr@pipe-a-edp-1.html
   [137]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-lnl-3/igt@kms_vrr@cmrr@pipe-a-edp-1.html

  * igt@kms_vrr@flip-basic:
    - shard-bmg:          NOTRUN -> [SKIP][138] ([Intel XE#1499])
   [138]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_vrr@flip-basic.html

  * igt@xe_ccs@ctrl-surf-copy:
    - shard-adlp:         NOTRUN -> [SKIP][139] ([Intel XE#455] / [Intel XE#488])
   [139]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_ccs@ctrl-surf-copy.html

  * igt@xe_eudebug@attach-debug-metadata:
    - shard-adlp:         NOTRUN -> [SKIP][140] ([Intel XE#4837]) +3 other tests skip
   [140]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_eudebug@attach-debug-metadata.html

  * igt@xe_eudebug@discovery-race-vmbind:
    - shard-bmg:          NOTRUN -> [SKIP][141] ([Intel XE#4837]) +8 other tests skip
   [141]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@xe_eudebug@discovery-race-vmbind.html

  * igt@xe_eudebug_online@interrupt-all-set-breakpoint:
    - shard-dg2-set2:     NOTRUN -> [SKIP][142] ([Intel XE#4837]) +5 other tests skip
   [142]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_eudebug_online@interrupt-all-set-breakpoint.html

  * igt@xe_eudebug_sriov@deny-eudebug:
    - shard-adlp:         NOTRUN -> [SKIP][143] ([Intel XE#4519])
   [143]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_eudebug_sriov@deny-eudebug.html

  * igt@xe_evict@evict-beng-large-external-cm:
    - shard-adlp:         NOTRUN -> [SKIP][144] ([Intel XE#261] / [Intel XE#688])
   [144]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_evict@evict-beng-large-external-cm.html

  * igt@xe_exec_balancer@twice-cm-virtual-userptr-rebind:
    - shard-dg2-set2:     [PASS][145] -> [SKIP][146] ([Intel XE#4208]) +10 other tests skip
   [145]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_exec_balancer@twice-cm-virtual-userptr-rebind.html
   [146]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_exec_balancer@twice-cm-virtual-userptr-rebind.html

  * igt@xe_exec_basic@many-execqueues-basic-defer-mmap:
    - shard-adlp:         [PASS][147] -> [SKIP][148] ([Intel XE#4945]) +9 other tests skip
   [147]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_exec_basic@many-execqueues-basic-defer-mmap.html
   [148]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_exec_basic@many-execqueues-basic-defer-mmap.html

  * igt@xe_exec_basic@multigpu-many-execqueues-many-vm-bindexecqueue-userptr:
    - shard-bmg:          NOTRUN -> [SKIP][149] ([Intel XE#2322]) +6 other tests skip
   [149]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-bindexecqueue-userptr.html

  * igt@xe_exec_basic@multigpu-many-execqueues-many-vm-userptr-invalidate-race:
    - shard-adlp:         NOTRUN -> [SKIP][150] ([Intel XE#1392]) +2 other tests skip
   [150]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-userptr-invalidate-race.html

  * igt@xe_exec_basic@multigpu-once-null-rebind:
    - shard-dg2-set2:     [PASS][151] -> [SKIP][152] ([Intel XE#1392]) +4 other tests skip
   [151]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-433/igt@xe_exec_basic@multigpu-once-null-rebind.html
   [152]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_exec_basic@multigpu-once-null-rebind.html

  * igt@xe_exec_basic@no-exec-bindexecqueue-rebind:
    - shard-dg2-set2:     NOTRUN -> [SKIP][153] ([Intel XE#4208]) +61 other tests skip
   [153]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_exec_basic@no-exec-bindexecqueue-rebind.html

  * igt@xe_exec_fault_mode@many-execqueues-userptr:
    - shard-adlp:         NOTRUN -> [SKIP][154] ([Intel XE#288]) +5 other tests skip
   [154]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_exec_fault_mode@many-execqueues-userptr.html

  * igt@xe_exec_fault_mode@twice-invalid-fault:
    - shard-dg2-set2:     NOTRUN -> [SKIP][155] ([Intel XE#288]) +12 other tests skip
   [155]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_exec_fault_mode@twice-invalid-fault.html

  * igt@xe_exec_mix_modes@exec-simple-batch-store-lr:
    - shard-adlp:         NOTRUN -> [SKIP][156] ([Intel XE#2360])
   [156]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_exec_mix_modes@exec-simple-batch-store-lr.html

  * igt@xe_exec_reset@parallel-gt-reset:
    - shard-bmg:          [PASS][157] -> [DMESG-WARN][158] ([Intel XE#3876])
   [157]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@xe_exec_reset@parallel-gt-reset.html
   [158]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@xe_exec_reset@parallel-gt-reset.html

  * igt@xe_exec_system_allocator@once-mmap-huge-nomemset:
    - shard-bmg:          NOTRUN -> [SKIP][159] ([Intel XE#4943]) +17 other tests skip
   [159]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@xe_exec_system_allocator@once-mmap-huge-nomemset.html

  * igt@xe_exec_system_allocator@process-many-mmap-free-race-nomemset:
    - shard-dg2-set2:     NOTRUN -> [SKIP][160] ([Intel XE#4915]) +126 other tests skip
   [160]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_exec_system_allocator@process-many-mmap-free-race-nomemset.html

  * igt@xe_exec_system_allocator@processes-evict-malloc:
    - shard-adlp:         NOTRUN -> [SKIP][161] ([Intel XE#4945]) +55 other tests skip
   [161]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_exec_system_allocator@processes-evict-malloc.html

  * igt@xe_exec_system_allocator@threads-many-large-execqueues-mmap-nomemset:
    - shard-adlp:         NOTRUN -> [SKIP][162] ([Intel XE#4915]) +52 other tests skip
   [162]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_exec_system_allocator@threads-many-large-execqueues-mmap-nomemset.html

  * igt@xe_exec_system_allocator@threads-shared-vm-many-large-execqueues-new-bo-map-nomemset:
    - shard-lnl:          [PASS][163] -> [FAIL][164] ([Intel XE#5018])
   [163]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-lnl-8/igt@xe_exec_system_allocator@threads-shared-vm-many-large-execqueues-new-bo-map-nomemset.html
   [164]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-lnl-7/igt@xe_exec_system_allocator@threads-shared-vm-many-large-execqueues-new-bo-map-nomemset.html

  * igt@xe_module_load@force-load:
    - shard-bmg:          NOTRUN -> [SKIP][165] ([Intel XE#2457])
   [165]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@xe_module_load@force-load.html

  * igt@xe_module_load@load:
    - shard-adlp:         ([PASS][166], [PASS][167], [PASS][168], [PASS][169], [PASS][170], [PASS][171], [PASS][172], [PASS][173], [PASS][174], [PASS][175], [PASS][176], [PASS][177], [PASS][178], [PASS][179], [PASS][180], [PASS][181], [PASS][182], [PASS][183], [PASS][184], [PASS][185], [PASS][186], [PASS][187], [PASS][188], [PASS][189], [PASS][190]) -> ([PASS][191], [PASS][192], [PASS][193], [PASS][194], [PASS][195], [PASS][196], [PASS][197], [PASS][198], [PASS][199], [PASS][200], [PASS][201], [PASS][202], [PASS][203], [PASS][204], [PASS][205], [PASS][206], [PASS][207], [PASS][208], [SKIP][209], [PASS][210], [PASS][211], [PASS][212], [PASS][213], [PASS][214], [PASS][215], [PASS][216]) ([Intel XE#378])
   [166]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-1/igt@xe_module_load@load.html
   [167]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-8/igt@xe_module_load@load.html
   [168]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-8/igt@xe_module_load@load.html
   [169]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-1/igt@xe_module_load@load.html
   [170]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-4/igt@xe_module_load@load.html
   [171]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-8/igt@xe_module_load@load.html
   [172]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-6/igt@xe_module_load@load.html
   [173]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-1/igt@xe_module_load@load.html
   [174]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_module_load@load.html
   [175]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_module_load@load.html
   [176]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-1/igt@xe_module_load@load.html
   [177]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_module_load@load.html
   [178]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-6/igt@xe_module_load@load.html
   [179]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-6/igt@xe_module_load@load.html
   [180]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@xe_module_load@load.html
   [181]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@xe_module_load@load.html
   [182]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@xe_module_load@load.html
   [183]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-6/igt@xe_module_load@load.html
   [184]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-4/igt@xe_module_load@load.html
   [185]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-4/igt@xe_module_load@load.html
   [186]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-6/igt@xe_module_load@load.html
   [187]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-3/igt@xe_module_load@load.html
   [188]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-3/igt@xe_module_load@load.html
   [189]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-3/igt@xe_module_load@load.html
   [190]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@xe_module_load@load.html
   [191]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_module_load@load.html
   [192]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_module_load@load.html
   [193]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-2/igt@xe_module_load@load.html
   [194]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-4/igt@xe_module_load@load.html
   [195]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-4/igt@xe_module_load@load.html
   [196]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_module_load@load.html
   [197]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-2/igt@xe_module_load@load.html
   [198]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-6/igt@xe_module_load@load.html
   [199]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_module_load@load.html
   [200]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-9/igt@xe_module_load@load.html
   [201]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-6/igt@xe_module_load@load.html
   [202]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-9/igt@xe_module_load@load.html
   [203]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-6/igt@xe_module_load@load.html
   [204]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-2/igt@xe_module_load@load.html
   [205]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-4/igt@xe_module_load@load.html
   [206]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_module_load@load.html
   [207]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-8/igt@xe_module_load@load.html
   [208]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-4/igt@xe_module_load@load.html
   [209]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_module_load@load.html
   [210]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_module_load@load.html
   [211]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-6/igt@xe_module_load@load.html
   [212]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-8/igt@xe_module_load@load.html
   [213]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_module_load@load.html
   [214]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-4/igt@xe_module_load@load.html
   [215]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-8/igt@xe_module_load@load.html
   [216]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-8/igt@xe_module_load@load.html
    - shard-dg2-set2:     ([PASS][217], [PASS][218], [PASS][219], [PASS][220], [PASS][221], [PASS][222], [PASS][223], [PASS][224], [PASS][225], [PASS][226], [PASS][227], [PASS][228], [PASS][229], [PASS][230], [PASS][231], [PASS][232], [PASS][233], [PASS][234], [PASS][235], [PASS][236], [PASS][237], [PASS][238], [PASS][239], [PASS][240], [PASS][241]) -> ([PASS][242], [PASS][243], [PASS][244], [PASS][245], [PASS][246], [PASS][247], [PASS][248], [PASS][249], [PASS][250], [PASS][251], [PASS][252], [PASS][253], [PASS][254], [PASS][255], [PASS][256], [PASS][257], [PASS][258], [PASS][259], [PASS][260], [PASS][261], [PASS][262], [PASS][263], [PASS][264], [SKIP][265], [PASS][266], [PASS][267]) ([Intel XE#378])
   [217]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-433/igt@xe_module_load@load.html
   [218]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_module_load@load.html
   [219]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-463/igt@xe_module_load@load.html
   [220]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-434/igt@xe_module_load@load.html
   [221]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-434/igt@xe_module_load@load.html
   [222]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-433/igt@xe_module_load@load.html
   [223]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-436/igt@xe_module_load@load.html
   [224]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-436/igt@xe_module_load@load.html
   [225]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-436/igt@xe_module_load@load.html
   [226]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-436/igt@xe_module_load@load.html
   [227]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-435/igt@xe_module_load@load.html
   [228]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-435/igt@xe_module_load@load.html
   [229]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-435/igt@xe_module_load@load.html
   [230]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-464/igt@xe_module_load@load.html
   [231]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-464/igt@xe_module_load@load.html
   [232]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_module_load@load.html
   [233]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-464/igt@xe_module_load@load.html
   [234]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-434/igt@xe_module_load@load.html
   [235]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-433/igt@xe_module_load@load.html
   [236]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_module_load@load.html
   [237]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-466/igt@xe_module_load@load.html
   [238]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-466/igt@xe_module_load@load.html
   [239]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-463/igt@xe_module_load@load.html
   [240]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-435/igt@xe_module_load@load.html
   [241]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-466/igt@xe_module_load@load.html
   [242]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-436/igt@xe_module_load@load.html
   [243]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-463/igt@xe_module_load@load.html
   [244]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-433/igt@xe_module_load@load.html
   [245]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_module_load@load.html
   [246]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-463/igt@xe_module_load@load.html
   [247]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-436/igt@xe_module_load@load.html
   [248]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-463/igt@xe_module_load@load.html
   [249]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@xe_module_load@load.html
   [250]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_module_load@load.html
   [251]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_module_load@load.html
   [252]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_module_load@load.html
   [253]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@xe_module_load@load.html
   [254]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-433/igt@xe_module_load@load.html
   [255]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-433/igt@xe_module_load@load.html
   [256]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@xe_module_load@load.html
   [257]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-436/igt@xe_module_load@load.html
   [258]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-433/igt@xe_module_load@load.html
   [259]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_module_load@load.html
   [260]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-464/igt@xe_module_load@load.html
   [261]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-464/igt@xe_module_load@load.html
   [262]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-464/igt@xe_module_load@load.html
   [263]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_module_load@load.html
   [264]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_module_load@load.html
   [265]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_module_load@load.html
   [266]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-466/igt@xe_module_load@load.html
   [267]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-466/igt@xe_module_load@load.html

  * igt@xe_oa@missing-sample-flags:
    - shard-dg2-set2:     NOTRUN -> [SKIP][268] ([Intel XE#2541] / [Intel XE#3573])
   [268]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@xe_oa@missing-sample-flags.html

  * igt@xe_oa@mmio-triggered-reports-read:
    - shard-dg2-set2:     NOTRUN -> [SKIP][269] ([Intel XE#5103])
   [269]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_oa@mmio-triggered-reports-read.html

  * igt@xe_oa@non-sampling-read-error:
    - shard-adlp:         NOTRUN -> [SKIP][270] ([Intel XE#2541] / [Intel XE#3573])
   [270]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_oa@non-sampling-read-error.html

  * igt@xe_oa@syncs-syncobj-cfg:
    - shard-dg2-set2:     NOTRUN -> [SKIP][271] ([Intel XE#2541] / [Intel XE#3573] / [Intel XE#4501])
   [271]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_oa@syncs-syncobj-cfg.html

  * igt@xe_pat@pat-index-xe2:
    - shard-adlp:         NOTRUN -> [SKIP][272] ([Intel XE#977])
   [272]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_pat@pat-index-xe2.html

  * igt@xe_pm@s2idle-d3cold-basic-exec:
    - shard-bmg:          NOTRUN -> [SKIP][273] ([Intel XE#2284]) +1 other test skip
   [273]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@xe_pm@s2idle-d3cold-basic-exec.html

  * igt@xe_pm@s4-vm-bind-userptr:
    - shard-adlp:         [PASS][274] -> [ABORT][275] ([Intel XE#1794])
   [274]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-2/igt@xe_pm@s4-vm-bind-userptr.html
   [275]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-9/igt@xe_pm@s4-vm-bind-userptr.html

  * igt@xe_pmu@fn-engine-activity-load:
    - shard-bmg:          NOTRUN -> [SKIP][276] ([Intel XE#4650])
   [276]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@xe_pmu@fn-engine-activity-load.html

  * igt@xe_pmu@fn-engine-activity-sched-if-idle:
    - shard-adlp:         NOTRUN -> [ABORT][277] ([Intel XE#5214])
   [277]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-4/igt@xe_pmu@fn-engine-activity-sched-if-idle.html

  * igt@xe_pxp@display-black-pxp-fb:
    - shard-bmg:          NOTRUN -> [SKIP][278] ([Intel XE#4733]) +1 other test skip
   [278]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@xe_pxp@display-black-pxp-fb.html

  * igt@xe_pxp@pxp-termination-key-update-post-suspend:
    - shard-dg2-set2:     NOTRUN -> [SKIP][279] ([Intel XE#4733]) +2 other tests skip
   [279]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@xe_pxp@pxp-termination-key-update-post-suspend.html

  * igt@xe_query@multigpu-query-config:
    - shard-dg2-set2:     NOTRUN -> [SKIP][280] ([Intel XE#944])
   [280]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_query@multigpu-query-config.html

  * igt@xe_query@multigpu-query-mem-usage:
    - shard-bmg:          NOTRUN -> [SKIP][281] ([Intel XE#944]) +2 other tests skip
   [281]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@xe_query@multigpu-query-mem-usage.html

  * igt@xe_query@multigpu-query-pxp-status:
    - shard-adlp:         NOTRUN -> [SKIP][282] ([Intel XE#944])
   [282]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_query@multigpu-query-pxp-status.html

  * igt@xe_sriov_flr@flr-vf1-clear:
    - shard-dg2-set2:     NOTRUN -> [SKIP][283] ([Intel XE#3342])
   [283]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_sriov_flr@flr-vf1-clear.html
    - shard-bmg:          NOTRUN -> [SKIP][284] ([Intel XE#3342])
   [284]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@xe_sriov_flr@flr-vf1-clear.html

  * igt@xe_sriov_scheduling@nonpreempt-engine-resets:
    - shard-bmg:          NOTRUN -> [SKIP][285] ([Intel XE#4351])
   [285]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@xe_sriov_scheduling@nonpreempt-engine-resets.html

  
#### Possible fixes ####

  * igt@kms_ccs@crc-primary-suspend-4-tiled-bmg-ccs:
    - shard-bmg:          [INCOMPLETE][286] ([Intel XE#3862]) -> [PASS][287] +1 other test pass
   [286]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-2/igt@kms_ccs@crc-primary-suspend-4-tiled-bmg-ccs.html
   [287]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_ccs@crc-primary-suspend-4-tiled-bmg-ccs.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs@pipe-a-dp-4:
    - shard-dg2-set2:     [INCOMPLETE][288] ([Intel XE#1727] / [Intel XE#2705] / [Intel XE#3113] / [Intel XE#4212] / [Intel XE#4522]) -> [PASS][289] +1 other test pass
   [288]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-464/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs@pipe-a-dp-4.html
   [289]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs@pipe-a-dp-4.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size:
    - shard-bmg:          [SKIP][290] ([Intel XE#2291]) -> [PASS][291] +2 other tests pass
   [290]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size.html
   [291]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-8/igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size.html

  * igt@kms_dp_aux_dev:
    - shard-bmg:          [SKIP][292] ([Intel XE#3009]) -> [PASS][293]
   [292]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_dp_aux_dev.html
   [293]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-8/igt@kms_dp_aux_dev.html

  * igt@kms_feature_discovery@display-2x:
    - shard-bmg:          [SKIP][294] ([Intel XE#2373]) -> [PASS][295]
   [294]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_feature_discovery@display-2x.html
   [295]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_feature_discovery@display-2x.html

  * igt@kms_flip@2x-wf_vblank-ts-check:
    - shard-bmg:          [SKIP][296] ([Intel XE#2316]) -> [PASS][297] +5 other tests pass
   [296]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_flip@2x-wf_vblank-ts-check.html
   [297]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_flip@2x-wf_vblank-ts-check.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-dp4:
    - shard-dg2-set2:     [FAIL][298] ([Intel XE#301]) -> [PASS][299] +3 other tests pass
   [298]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-433/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-dp4.html
   [299]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-463/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-dp4.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible:
    - shard-dg2-set2:     [INCOMPLETE][300] ([Intel XE#2049]) -> [PASS][301] +1 other test pass
   [300]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-466/igt@kms_flip@plain-flip-fb-recreate-interruptible.html
   [301]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@kms_flip@plain-flip-fb-recreate-interruptible.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible@a-dp2:
    - shard-bmg:          [FAIL][302] ([Intel XE#2882]) -> [PASS][303]
   [302]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-4/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-dp2.html
   [303]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-1/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-dp2.html

  * igt@kms_flip@wf_vblank-ts-check@a-edp1:
    - shard-lnl:          [FAIL][304] ([Intel XE#886]) -> [PASS][305] +1 other test pass
   [304]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-lnl-2/igt@kms_flip@wf_vblank-ts-check@a-edp1.html
   [305]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-lnl-8/igt@kms_flip@wf_vblank-ts-check@a-edp1.html

  * igt@kms_hdr@invalid-hdr:
    - shard-dg2-set2:     [SKIP][306] ([Intel XE#455]) -> [PASS][307]
   [306]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-433/igt@kms_hdr@invalid-hdr.html
   [307]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-463/igt@kms_hdr@invalid-hdr.html

  * igt@kms_hdr@static-swap:
    - shard-bmg:          [SKIP][308] ([Intel XE#1503]) -> [PASS][309]
   [308]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_hdr@static-swap.html
   [309]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@kms_hdr@static-swap.html

  * igt@kms_plane_multiple@2x-tiling-x:
    - shard-bmg:          [SKIP][310] ([Intel XE#4596]) -> [PASS][311]
   [310]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_plane_multiple@2x-tiling-x.html
   [311]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_plane_multiple@2x-tiling-x.html

  * igt@kms_vblank@query-idle@pipe-a-hdmi-a-1:
    - shard-adlp:         [DMESG-WARN][312] ([Intel XE#2953] / [Intel XE#4173]) -> [PASS][313] +1 other test pass
   [312]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-6/igt@kms_vblank@query-idle@pipe-a-hdmi-a-1.html
   [313]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-8/igt@kms_vblank@query-idle@pipe-a-hdmi-a-1.html

  * igt@xe_exec_basic@multigpu-no-exec-basic-defer-mmap:
    - shard-dg2-set2:     [SKIP][314] ([Intel XE#1392]) -> [PASS][315] +2 other tests pass
   [314]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_exec_basic@multigpu-no-exec-basic-defer-mmap.html
   [315]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-435/igt@xe_exec_basic@multigpu-no-exec-basic-defer-mmap.html

  * igt@xe_exec_system_allocator@threads-shared-vm-many-execqueues-new-bo-map:
    - shard-lnl:          [FAIL][316] -> [PASS][317]
   [316]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-lnl-1/igt@xe_exec_system_allocator@threads-shared-vm-many-execqueues-new-bo-map.html
   [317]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-lnl-1/igt@xe_exec_system_allocator@threads-shared-vm-many-execqueues-new-bo-map.html

  * igt@xe_exec_system_allocator@threads-shared-vm-many-large-new-bo-map-nomemset:
    - shard-lnl:          [FAIL][318] ([Intel XE#5018]) -> [PASS][319]
   [318]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-lnl-7/igt@xe_exec_system_allocator@threads-shared-vm-many-large-new-bo-map-nomemset.html
   [319]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-lnl-3/igt@xe_exec_system_allocator@threads-shared-vm-many-large-new-bo-map-nomemset.html

  * igt@xe_pm@s4-mocs:
    - shard-adlp:         [ABORT][320] ([Intel XE#1794]) -> [PASS][321]
   [320]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_pm@s4-mocs.html
   [321]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_pm@s4-mocs.html

  * igt@xe_pm@s4-vm-bind-userptr:
    - shard-bmg:          [ABORT][322] ([Intel XE#5255]) -> [PASS][323]
   [322]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@xe_pm@s4-vm-bind-userptr.html
   [323]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-8/igt@xe_pm@s4-vm-bind-userptr.html

  
#### Warnings ####

  * igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-180-async-flip:
    - shard-adlp:         [DMESG-FAIL][324] ([Intel XE#4543]) -> [SKIP][325] ([Intel XE#4947])
   [324]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-180-async-flip.html
   [325]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-180-async-flip.html

  * igt@kms_big_fb@yf-tiled-addfb-size-overflow:
    - shard-adlp:         [SKIP][326] ([Intel XE#610]) -> [SKIP][327] ([Intel XE#4947])
   [326]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_big_fb@yf-tiled-addfb-size-overflow.html
   [327]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_big_fb@yf-tiled-addfb-size-overflow.html

  * igt@kms_ccs@crc-primary-rotation-180-4-tiled-lnl-ccs:
    - shard-adlp:         [SKIP][328] ([Intel XE#2907]) -> [SKIP][329] ([Intel XE#4947])
   [328]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_ccs@crc-primary-rotation-180-4-tiled-lnl-ccs.html
   [329]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_ccs@crc-primary-rotation-180-4-tiled-lnl-ccs.html
    - shard-dg2-set2:     [SKIP][330] ([Intel XE#2907]) -> [SKIP][331] ([Intel XE#2231] / [Intel XE#4208])
   [330]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_ccs@crc-primary-rotation-180-4-tiled-lnl-ccs.html
   [331]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_ccs@crc-primary-rotation-180-4-tiled-lnl-ccs.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs:
    - shard-dg2-set2:     [INCOMPLETE][332] ([Intel XE#1727] / [Intel XE#2705] / [Intel XE#3113] / [Intel XE#4212] / [Intel XE#4345] / [Intel XE#4522]) -> [INCOMPLETE][333] ([Intel XE#1727] / [Intel XE#3113] / [Intel XE#3124])
   [332]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-463/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs.html
   [333]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-466/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs.html

  * igt@kms_cdclk@mode-transition:
    - shard-adlp:         [SKIP][334] ([Intel XE#4417] / [Intel XE#455]) -> [SKIP][335] ([Intel XE#4947])
   [334]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_cdclk@mode-transition.html
   [335]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_cdclk@mode-transition.html

  * igt@kms_chamelium_hpd@dp-hpd-for-each-pipe:
    - shard-adlp:         [SKIP][336] ([Intel XE#373]) -> [SKIP][337] ([Intel XE#4950])
   [336]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_chamelium_hpd@dp-hpd-for-each-pipe.html
   [337]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_chamelium_hpd@dp-hpd-for-each-pipe.html
    - shard-dg2-set2:     [SKIP][338] ([Intel XE#373]) -> [SKIP][339] ([Intel XE#4208] / [i915#2575])
   [338]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_chamelium_hpd@dp-hpd-for-each-pipe.html
   [339]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_chamelium_hpd@dp-hpd-for-each-pipe.html

  * igt@kms_content_protection@atomic-dpms:
    - shard-bmg:          [SKIP][340] ([Intel XE#2341]) -> [FAIL][341] ([Intel XE#1178])
   [340]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-5/igt@kms_content_protection@atomic-dpms.html
   [341]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-4/igt@kms_content_protection@atomic-dpms.html

  * igt@kms_content_protection@srm:
    - shard-bmg:          [FAIL][342] ([Intel XE#1178]) -> [SKIP][343] ([Intel XE#2341]) +1 other test skip
   [342]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-2/igt@kms_content_protection@srm.html
   [343]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@kms_content_protection@srm.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank:
    - shard-bmg:          [SKIP][344] ([Intel XE#2316]) -> [FAIL][345] ([Intel XE#2882])
   [344]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-5/igt@kms_flip@2x-flip-vs-absolute-wf_vblank.html
   [345]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-4/igt@kms_flip@2x-flip-vs-absolute-wf_vblank.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling:
    - shard-adlp:         [SKIP][346] ([Intel XE#455]) -> [SKIP][347] ([Intel XE#2351] / [Intel XE#4947])
   [346]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling.html
   [347]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling.html
    - shard-dg2-set2:     [SKIP][348] ([Intel XE#455]) -> [SKIP][349] ([Intel XE#2231] / [Intel XE#4208]) +1 other test skip
   [348]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling.html
   [349]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling.html

  * igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-downscaling:
    - shard-adlp:         [SKIP][350] ([Intel XE#455]) -> [SKIP][351] ([Intel XE#4947]) +1 other test skip
   [350]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-downscaling.html
   [351]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-downscaling.html

  * igt@kms_frontbuffer_tracking@drrs-2p-primscrn-spr-indfb-draw-render:
    - shard-bmg:          [SKIP][352] ([Intel XE#2311]) -> [SKIP][353] ([Intel XE#2312]) +17 other tests skip
   [352]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_frontbuffer_tracking@drrs-2p-primscrn-spr-indfb-draw-render.html
   [353]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-6/igt@kms_frontbuffer_tracking@drrs-2p-primscrn-spr-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-indfb-pgflip-blt:
    - shard-bmg:          [SKIP][354] ([Intel XE#2312]) -> [SKIP][355] ([Intel XE#2311]) +13 other tests skip
   [354]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-6/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-indfb-pgflip-blt.html
   [355]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-2/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-indfb-pgflip-blt.html

  * igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-render:
    - shard-dg2-set2:     [SKIP][356] ([Intel XE#651]) -> [SKIP][357] ([Intel XE#2231] / [Intel XE#4208]) +1 other test skip
   [356]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-render.html
   [357]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-render:
    - shard-bmg:          [SKIP][358] ([Intel XE#2312]) -> [SKIP][359] ([Intel XE#4141]) +7 other tests skip
   [358]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-5/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-render.html
   [359]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-7/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-onoff:
    - shard-bmg:          [SKIP][360] ([Intel XE#4141]) -> [SKIP][361] ([Intel XE#2312]) +10 other tests skip
   [360]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-onoff.html
   [361]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-5/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-onoff.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-pri-shrfb-draw-blt:
    - shard-adlp:         [SKIP][362] ([Intel XE#651]) -> [SKIP][363] ([Intel XE#2351] / [Intel XE#4947])
   [362]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-pri-shrfb-draw-blt.html
   [363]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-pri-shrfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-blt:
    - shard-dg2-set2:     [SKIP][364] ([Intel XE#653]) -> [SKIP][365] ([Intel XE#2231] / [Intel XE#4208]) +2 other tests skip
   [364]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-blt.html
   [365]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-indfb-plflip-blt:
    - shard-bmg:          [SKIP][366] ([Intel XE#2312]) -> [SKIP][367] ([Intel XE#2313]) +13 other tests skip
   [366]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-5/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-indfb-plflip-blt.html
   [367]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-7/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-indfb-plflip-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-mmap-wc:
    - shard-adlp:         [SKIP][368] ([Intel XE#656]) -> [SKIP][369] ([Intel XE#4947]) +2 other tests skip
   [368]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-mmap-wc.html
   [369]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-pgflip-blt:
    - shard-bmg:          [SKIP][370] ([Intel XE#2313]) -> [SKIP][371] ([Intel XE#2312]) +20 other tests skip
   [370]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-bmg-7/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-pgflip-blt.html
   [371]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-bmg-6/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-pgflip-blt.html

  * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-blt:
    - shard-adlp:         [SKIP][372] ([Intel XE#653]) -> [SKIP][373] ([Intel XE#4947]) +1 other test skip
   [372]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-blt.html
   [373]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-blt.html

  * igt@kms_plane_scaling@intel-max-src-size:
    - shard-adlp:         [SKIP][374] ([Intel XE#455]) -> [SKIP][375] ([Intel XE#4950]) +1 other test skip
   [374]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_plane_scaling@intel-max-src-size.html
   [375]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_plane_scaling@intel-max-src-size.html

  * igt@kms_psr@pr-cursor-render:
    - shard-adlp:         [SKIP][376] ([Intel XE#2850] / [Intel XE#929]) -> [SKIP][377] ([Intel XE#4947]) +1 other test skip
   [376]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_psr@pr-cursor-render.html
   [377]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_psr@pr-cursor-render.html
    - shard-dg2-set2:     [SKIP][378] ([Intel XE#2850] / [Intel XE#929]) -> [SKIP][379] ([Intel XE#2231] / [Intel XE#4208]) +1 other test skip
   [378]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@kms_psr@pr-cursor-render.html
   [379]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@kms_psr@pr-cursor-render.html

  * igt@kms_rotation_crc@primary-4-tiled-reflect-x-180:
    - shard-adlp:         [SKIP][380] ([Intel XE#1127]) -> [SKIP][381] ([Intel XE#4950])
   [380]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@kms_rotation_crc@primary-4-tiled-reflect-x-180.html
   [381]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@kms_rotation_crc@primary-4-tiled-reflect-x-180.html

  * igt@xe_ccs@suspend-resume:
    - shard-adlp:         [SKIP][382] ([Intel XE#455] / [Intel XE#488]) -> [SKIP][383] ([Intel XE#4945])
   [382]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_ccs@suspend-resume.html
   [383]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_ccs@suspend-resume.html

  * igt@xe_evict@evict-large-cm:
    - shard-adlp:         [SKIP][384] ([Intel XE#261] / [Intel XE#688]) -> [SKIP][385] ([Intel XE#4945])
   [384]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_evict@evict-large-cm.html
   [385]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_evict@evict-large-cm.html

  * igt@xe_exec_fault_mode@many-bindexecqueue-rebind:
    - shard-adlp:         [SKIP][386] ([Intel XE#288]) -> [SKIP][387] ([Intel XE#4945]) +1 other test skip
   [386]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_exec_fault_mode@many-bindexecqueue-rebind.html
   [387]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_exec_fault_mode@many-bindexecqueue-rebind.html
    - shard-dg2-set2:     [SKIP][388] ([Intel XE#288]) -> [SKIP][389] ([Intel XE#4208]) +1 other test skip
   [388]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_exec_fault_mode@many-bindexecqueue-rebind.html
   [389]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_exec_fault_mode@many-bindexecqueue-rebind.html

  * igt@xe_exec_system_allocator@many-large-execqueues-mmap-remap-dontunmap-eocheck:
    - shard-adlp:         [SKIP][390] ([Intel XE#4915]) -> [SKIP][391] ([Intel XE#4945]) +30 other tests skip
   [390]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_exec_system_allocator@many-large-execqueues-mmap-remap-dontunmap-eocheck.html
   [391]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_exec_system_allocator@many-large-execqueues-mmap-remap-dontunmap-eocheck.html

  * igt@xe_exec_system_allocator@threads-many-large-mmap-mlock:
    - shard-dg2-set2:     [SKIP][392] ([Intel XE#4915]) -> [SKIP][393] ([Intel XE#4208]) +27 other tests skip
   [392]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_exec_system_allocator@threads-many-large-mmap-mlock.html
   [393]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_exec_system_allocator@threads-many-large-mmap-mlock.html

  * igt@xe_oa@syncs-ufence-wait-cfg:
    - shard-dg2-set2:     [SKIP][394] ([Intel XE#2541] / [Intel XE#3573] / [Intel XE#4501]) -> [SKIP][395] ([Intel XE#4208])
   [394]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_oa@syncs-ufence-wait-cfg.html
   [395]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_oa@syncs-ufence-wait-cfg.html
    - shard-adlp:         [SKIP][396] ([Intel XE#2541] / [Intel XE#3573] / [Intel XE#4501]) -> [SKIP][397] ([Intel XE#4945])
   [396]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_oa@syncs-ufence-wait-cfg.html
   [397]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_oa@syncs-ufence-wait-cfg.html

  * igt@xe_pm@d3cold-mmap-vram:
    - shard-dg2-set2:     [SKIP][398] ([Intel XE#2284] / [Intel XE#366]) -> [SKIP][399] ([Intel XE#4208])
   [398]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_pm@d3cold-mmap-vram.html
   [399]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_pm@d3cold-mmap-vram.html
    - shard-adlp:         [SKIP][400] ([Intel XE#2284] / [Intel XE#366]) -> [SKIP][401] ([Intel XE#4945])
   [400]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_pm@d3cold-mmap-vram.html
   [401]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_pm@d3cold-mmap-vram.html

  * igt@xe_pm@s4-vm-bind-unbind-all:
    - shard-adlp:         [ABORT][402] ([Intel XE#1794]) -> [SKIP][403] ([Intel XE#4945])
   [402]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_pm@s4-vm-bind-unbind-all.html
   [403]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_pm@s4-vm-bind-unbind-all.html

  * igt@xe_pmu@engine-activity-all-load-idle:
    - shard-adlp:         [ABORT][404] ([Intel XE#5214]) -> [DMESG-WARN][405] ([Intel XE#5214])
   [404]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-1/igt@xe_pmu@engine-activity-all-load-idle.html
   [405]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-8/igt@xe_pmu@engine-activity-all-load-idle.html

  * igt@xe_pmu@engine-activity-load-idle:
    - shard-adlp:         [DMESG-WARN][406] ([Intel XE#5214]) -> [ABORT][407] ([Intel XE#5214]) +1 other test abort
   [406]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-4/igt@xe_pmu@engine-activity-load-idle.html
   [407]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-1/igt@xe_pmu@engine-activity-load-idle.html

  * igt@xe_query@multigpu-query-gt-list:
    - shard-adlp:         [SKIP][408] ([Intel XE#944]) -> [SKIP][409] ([Intel XE#4945])
   [408]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-9/igt@xe_query@multigpu-query-gt-list.html
   [409]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_query@multigpu-query-gt-list.html
    - shard-dg2-set2:     [SKIP][410] ([Intel XE#944]) -> [SKIP][411] ([Intel XE#4208])
   [410]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-dg2-432/igt@xe_query@multigpu-query-gt-list.html
   [411]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-dg2-432/igt@xe_query@multigpu-query-gt-list.html

  * igt@xe_sriov_scheduling@equal-throughput:
    - shard-adlp:         [ABORT][412] ([Intel XE#5214]) -> [DMESG-FAIL][413] ([Intel XE#5237]) +1 other test dmesg-fail
   [412]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-3254-6e474d767e318b98cc45d4b90095290879085741/shard-adlp-8/igt@xe_sriov_scheduling@equal-throughput.html
   [413]: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/shard-adlp-3/igt@xe_sriov_scheduling@equal-throughput.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [Intel XE#1124]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1124
  [Intel XE#1127]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1127
  [Intel XE#1129]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1129
  [Intel XE#1178]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1178
  [Intel XE#1188]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1188
  [Intel XE#1392]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1392
  [Intel XE#1435]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1435
  [Intel XE#1439]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1439
  [Intel XE#1489]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1489
  [Intel XE#1499]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1499
  [Intel XE#1500]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1500
  [Intel XE#1503]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1503
  [Intel XE#1727]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1727
  [Intel XE#1794]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1794
  [Intel XE#2049]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2049
  [Intel XE#2191]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2191
  [Intel XE#2231]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2231
  [Intel XE#2234]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2234
  [Intel XE#2244]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2244
  [Intel XE#2252]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2252
  [Intel XE#2284]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2284
  [Intel XE#2291]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2291
  [Intel XE#2293]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2293
  [Intel XE#2311]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2311
  [Intel XE#2312]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2312
  [Intel XE#2313]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2313
  [Intel XE#2314]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2314
  [Intel XE#2316]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2316
  [Intel XE#2320]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2320
  [Intel XE#2321]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2321
  [Intel XE#2322]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2322
  [Intel XE#2325]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2325
  [Intel XE#2327]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2327
  [Intel XE#2330]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2330
  [Intel XE#2341]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2341
  [Intel XE#2351]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2351
  [Intel XE#2360]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2360
  [Intel XE#2372]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2372
  [Intel XE#2373]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2373
  [Intel XE#2380]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2380
  [Intel XE#2387]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2387
  [Intel XE#2390]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2390
  [Intel XE#2414]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2414
  [Intel XE#2426]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2426
  [Intel XE#2457]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2457
  [Intel XE#2509]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2509
  [Intel XE#2541]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2541
  [Intel XE#2597]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2597
  [Intel XE#261]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/261
  [Intel XE#2652]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2652
  [Intel XE#2705]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2705
  [Intel XE#2724]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2724
  [Intel XE#2850]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2850
  [Intel XE#288]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/288
  [Intel XE#2882]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2882
  [Intel XE#2887]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2887
  [Intel XE#2894]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2894
  [Intel XE#2907]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2907
  [Intel XE#2925]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2925
  [Intel XE#2939]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2939
  [Intel XE#2953]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2953
  [Intel XE#3009]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3009
  [Intel XE#301]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/301
  [Intel XE#306]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/306
  [Intel XE#307]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/307
  [Intel XE#308]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/308
  [Intel XE#309]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/309
  [Intel XE#3098]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3098
  [Intel XE#310]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/310
  [Intel XE#3113]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3113
  [Intel XE#3124]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3124
  [Intel XE#3141]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3141
  [Intel XE#316]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/316
  [Intel XE#323]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/323
  [Intel XE#3309]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3309
  [Intel XE#3321]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3321
  [Intel XE#3342]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3342
  [Intel XE#3374]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3374
  [Intel XE#3414]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3414
  [Intel XE#3432]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3432
  [Intel XE#346]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/346
  [Intel XE#3544]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3544
  [Intel XE#3573]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3573
  [Intel XE#366]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/366
  [Intel XE#367]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/367
  [Intel XE#373]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/373
  [Intel XE#3767]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3767
  [Intel XE#378]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/378
  [Intel XE#3862]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3862
  [Intel XE#3876]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3876
  [Intel XE#3904]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3904
  [Intel XE#4141]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4141
  [Intel XE#4173]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4173
  [Intel XE#4208]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4208
  [Intel XE#4210]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4210
  [Intel XE#4212]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4212
  [Intel XE#4302]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4302
  [Intel XE#4345]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4345
  [Intel XE#4351]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4351
  [Intel XE#4416]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4416
  [Intel XE#4417]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4417
  [Intel XE#4418]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4418
  [Intel XE#4422]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4422
  [Intel XE#4427]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4427
  [Intel XE#4459]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4459
  [Intel XE#4501]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4501
  [Intel XE#4519]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4519
  [Intel XE#4522]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4522
  [Intel XE#4543]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4543
  [Intel XE#455]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/455
  [Intel XE#4596]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4596
  [Intel XE#4650]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4650
  [Intel XE#4733]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4733
  [Intel XE#4837]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4837
  [Intel XE#488]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/488
  [Intel XE#4915]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4915
  [Intel XE#4943]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4943
  [Intel XE#4945]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4945
  [Intel XE#4947]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4947
  [Intel XE#4950]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4950
  [Intel XE#4963]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4963
  [Intel XE#5018]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5018
  [Intel XE#5021]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5021
  [Intel XE#5103]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5103
  [Intel XE#5172]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5172
  [Intel XE#5214]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5214
  [Intel XE#5237]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5237
  [Intel XE#5255]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/5255
  [Intel XE#607]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/607
  [Intel XE#610]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/610
  [Intel XE#616]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/616
  [Intel XE#651]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/651
  [Intel XE#653]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/653
  [Intel XE#656]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/656
  [Intel XE#688]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/688
  [Intel XE#787]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/787
  [Intel XE#836]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/836
  [Intel XE#870]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/870
  [Intel XE#886]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/886
  [Intel XE#929]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/929
  [Intel XE#944]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/944
  [Intel XE#977]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/977
  [i915#2575]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/2575


Build changes
-------------

  * Linux: xe-3254-6e474d767e318b98cc45d4b90095290879085741 -> xe-pw-149605v2

  IGT_8411: d5b5d2bb4f8795a98ea58376a128b74f654b7ec1 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  xe-3254-6e474d767e318b98cc45d4b90095290879085741: 6e474d767e318b98cc45d4b90095290879085741
  xe-pw-149605v2: 149605v2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-149605v2/index.html

[-- Attachment #2: Type: text/html, Size: 120061 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16  6:47 [PATCH] drm/xe: Thread prefetch of SVM ranges Matthew Brost
                   ` (3 preceding siblings ...)
  2025-06-16 17:53 ` ✗ Xe.CI.Full: failure " Patchwork
@ 2025-06-17 12:43 ` Thomas Hellström
  2025-06-17 14:43   ` Matthew Brost
  4 siblings, 1 reply; 14+ messages in thread
From: Thomas Hellström @ 2025-06-17 12:43 UTC (permalink / raw)
  To: Matthew Brost, intel-xe; +Cc: himal.prasad.ghimiray, michal.mrozek

Hi, Matt

On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> The migrate_vma_* functions are very CPU-intensive; as a result,
> prefetching SVM ranges is limited by CPU performance rather than
> paging
> copy engine bandwidth. To accelerate SVM range prefetching, the step
> that calls migrate_vma_* is now threaded. This uses a dedicated
> workqueue, as the page fault workqueue cannot be shared without
> risking
> deadlocks—due to the prefetch IOCTL holding the VM lock in write mode
> while work items in the page fault workqueue also require the VM
> lock.
> 
> The prefetch workqueue is currently allocated in GT, similar to the
> page
> fault workqueue. While this is likely not the ideal location for
> either,
> refactoring will be deferred to a later patch.
> 
> Running xe_exec_system_allocator --r prefetch-benchmark, which tests
> 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s with
> this patch on drm-tip. Enabling high SLPC further increases
> throughput
> to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16 GB/s.
> Both
> of these optimizations are upcoming.

I looked at this again. I still think there are some optimizations that
could be done in addition to Francois series to lessen the impact of
this, but nevertheless to quickly get the real workload running on the
GPU again when used on a single-client system.

I raised a question with the maintainers whether we should keep
optimizations like this that improves performance for one client at the
cost of others behind a kernel konfig, and also whether to expose
parameters like the width of the queue both for this purpose and for
parallel faults as sysfs knobs.

Some comments inline:

> 
> v2:
>  - Use dedicated prefetch workqueue
>  - Pick dedicated prefetch thread count based on profiling
>  - Skip threaded prefetch for only 1 range or if prefetching to SRAM
>  - Fully tested
> 
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
>  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
>  drivers/gpu/drm/xe/xe_vm.c           | 128 +++++++++++++++++++++----
> --
>  3 files changed, 135 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index e2d975b2fddb..941cca3371f2 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
>  
>  	destroy_workqueue(gt->usm.acc_wq);
>  	destroy_workqueue(gt->usm.pf_wq);
> +	if (gt->usm.prefetch_wq)
> +		destroy_workqueue(gt->usm.prefetch_wq);
>  }
>  
>  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> *pf_queue)
> @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt *gt,
> struct pf_queue *pf_queue)
>  	return 0;
>  }
>  
> +static int prefetch_thread_count(struct xe_device *xe)
> +{
> +	if (!IS_DGFX(xe))
> +		return 0;
> +
> +	/*
> +	 * Based on profiling large aligned 2M prefetches, this is
> the optimial
> +	 * number of threads on BMG (only platform currently
> supported). This
> +	 * should be tuned for each supported platform and can
> change on per
> +	 * platform basis as optimizations land (e.g., large device
> pages).
> +	 */
> +	return 5;
> +}
> +
>  int xe_gt_pagefault_init(struct xe_gt *gt)
>  {
>  	struct xe_device *xe = gt_to_xe(gt);
> -	int i, ret = 0;
> +	int i, count, ret = 0;
>  
>  	if (!xe->info.has_usm)
>  		return 0;
> @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
>  	if (!gt->usm.pf_wq)
>  		return -ENOMEM;
>  
> +	count = prefetch_thread_count(xe);
> +	if (count) {
> +		gt->usm.prefetch_wq =
> alloc_workqueue("xe_gt_prefetch_work_queue",
> +						      WQ_UNBOUND |
> WQ_HIGHPRI,
> +						      count);

Can we avoid WQ_HIGHPRI here without losing performance?
Also if count gets near the number of available high-performance cores,
I suspect we might see less effect of parallelizing like this?


> +		if (!gt->usm.prefetch_wq) {
> +			destroy_workqueue(gt->usm.pf_wq);
> +			return -ENOMEM;
> +		}
> +	}
> +
>  	gt->usm.acc_wq =
> alloc_workqueue("xe_gt_access_counter_work_queue",
>  					 WQ_UNBOUND | WQ_HIGHPRI,
>  					 NUM_ACC_QUEUE);
>  	if (!gt->usm.acc_wq) {
> +		if (gt->usm.prefetch_wq)
> +			destroy_workqueue(gt->usm.prefetch_wq);
>  		destroy_workqueue(gt->usm.pf_wq);
>  		return -ENOMEM;
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> b/drivers/gpu/drm/xe/xe_gt_types.h
> index 7def0959da35..d9ba4921b8ce 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -239,6 +239,8 @@ struct xe_gt {
>  		u16 reserved_bcs_instance;
>  		/** @usm.pf_wq: page fault work queue, unbound, high
> priority */
>  		struct workqueue_struct *pf_wq;
> +		/** @usm.prefetch_wq: prefetch work queue, unbound,
> high priority */
> +		struct workqueue_struct *prefetch_wq;
>  		/** @usm.acc_wq: access counter work queue, unbound,
> high priority */
>  		struct workqueue_struct *acc_wq;
>  		/**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 6ef8c4dab647..1ae8e03aead6 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma *vma)
>  	return 0;
>  }
>  
> -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> +struct prefetch_thread {
> +	struct work_struct work;
> +	struct drm_gpusvm_ctx *ctx;
> +	struct xe_vma *vma;
> +	struct xe_svm_range *svm_range;
> +	struct xe_tile *tile;
> +	u32 region;
> +	int err;
> +};
> +
> +static void prefetch_work_func(struct work_struct *w)
>  {
> -	bool devmem_possible = IS_DGFX(vm->xe) &&
> IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> +	struct prefetch_thread *thread =
> +		container_of(w, struct prefetch_thread, work);
> +	struct xe_vma *vma = thread->vma;
> +	struct xe_vm *vm = xe_vma_vm(vma);
> +	struct xe_svm_range *svm_range = thread->svm_range;
> +	u32 region = thread->region;
> +	struct xe_tile *tile = thread->tile;
>  	int err = 0;
>  
> -	struct xe_svm_range *svm_range;
> +	if (!region) {
> +		xe_svm_range_migrate_to_smem(vm, svm_range);
> +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> vma, region)) {
> +		err = xe_svm_alloc_vram(vm, tile, svm_range, thread-
> >ctx);
> +		if (err) {
> +			drm_dbg(&vm->xe->drm,
> +				"VRAM allocation failed, retry from
> userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> +				vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> +			thread->err = -ENODATA;
> +			return;
> +		}
> +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> MIGRATED TO VRAM");
> +	}
> +
> +	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
> +	if (err) {
> +		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u,
> gpusvm=%p, errno=%pe\n",
> +			vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> +		if (err == -EOPNOTSUPP || err == -EFAULT || err == -
> EPERM)
> +			err = -ENODATA;
> +		thread->err = err;
> +		return;
> +	}
> +
> +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES
> DONE");
> +}
> +
> +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> +{
> +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> +	u32 j, region = op->prefetch_range.region;
>  	struct drm_gpusvm_ctx ctx = {};
> -	struct xe_tile *tile;
> +	struct prefetch_thread stack_thread;
> +	struct xe_svm_range *svm_range;
> +	struct xarray prefetches;
> +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> +	struct xe_tile *tile = sram ? xe_device_get_root_tile(vm-
> >xe) :
> +		&vm->xe->tiles[region_to_mem_type[region] -
> XE_PL_VRAM0];
>  	unsigned long i;
> -	u32 region;
> +	bool devmem_possible = IS_DGFX(vm->xe) &&
> +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> +	bool skip_threads = op->prefetch_range.ranges_count == 1 ||
> sram;
> +	struct prefetch_thread *thread = skip_threads ?
> &stack_thread : NULL;
> +	int err = 0;
>  
>  	if (!xe_vma_is_cpu_addr_mirror(vma))
>  		return 0;
>  
> -	region = op->prefetch_range.region;
> +	if (!skip_threads)
> +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
>  
>  	ctx.read_only = xe_vma_read_only(vma);
>  	ctx.devmem_possible = devmem_possible;
>  	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
>  
> -	/* TODO: Threading the migration */
>  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> -		if (!region)
> -			xe_svm_range_migrate_to_smem(vm, svm_range);
> +		if (!skip_threads) {
> +			thread = kmalloc(sizeof(*thread),
> GFP_KERNEL);
> +			if (!thread)
> +				goto wait_threads;
>  
> -		if (xe_svm_range_needs_migrate_to_vram(svm_range,
> vma, region)) {
> -			tile = &vm->xe-
> >tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> -			err = xe_svm_alloc_vram(vm, tile, svm_range,
> &ctx);
> +			err = xa_alloc(&prefetches, &j, thread,
> xa_limit_32b,
> +				       GFP_KERNEL);

No locking (like in xarray) required here since prefetches is a stack
variable, and no reason to expect cache trashing so use a linked list
or simple array instead of an xarray?


>  			if (err) {
> -				drm_dbg(&vm->xe->drm, "VRAM
> allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> errno=%pe\n",
> -					vm->usm.asid, &vm-
> >svm.gpusvm, ERR_PTR(err));
> -				return -ENODATA;
> +				kfree(thread);
> +				goto wait_threads;
>  			}
> -			xe_svm_range_debug(svm_range, "PREFETCH -
> RANGE MIGRATED TO VRAM");
>  		}
>  
> -		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
> -		if (err) {
> -			drm_dbg(&vm->xe->drm, "Get pages failed,
> asid=%u, gpusvm=%p, errno=%pe\n",
> -				vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> -			if (err == -EOPNOTSUPP || err == -EFAULT ||
> err == -EPERM)
> -				err = -ENODATA;
> -			return err;
> +		INIT_WORK(&thread->work, prefetch_work_func);
> +		thread->ctx = &ctx;
> +		thread->vma = vma;
> +		thread->svm_range = svm_range;
> +		thread->tile = tile;
> +		thread->region = region;
> +		thread->err = 0;
> +
> +		if (skip_threads) {
> +			prefetch_work_func(&thread->work);
> +			if (thread->err)
> +				return thread->err;
> +		} else {
> +			/*
> +			 * Prefetch uses a dedicated workqueue, as
> the page
> +			 * fault workqueue cannot be shared without
> risking
> +			 * deadlocks—due to holding the VM lock in
> write mode
> +			 * here while work items in the page fault
> workqueue
> +			 * also require the VM lock.
> +			 */

Hmm. This is weird. In principle, a parallel fault handler could be
processing the same range simultaneously, and blow things up but since
we hold the vm lock on behalf of the threads this doesn't happen. But
if we were to properly annotate, for example drm_gpusvm_get_pages()
with drm_gpusvm_driver_lock_held(), then that would assert. I don't
think "let's hold the vm lock on behalf of the threads" is acceptable,
really, unless we can find other examples in the kernel or preferrably
even in drm.

This means we need some form of finer-grained locking in gpusvm, like
for example a per-range lock, to be able to relax the vm lock to read
mode both in the fault handler and here?


> +			queue_work(tile->primary_gt-
> >usm.prefetch_wq,
> +				   &thread->work);
> +		}
> +	}
> +
> +wait_threads:
> +	if (!skip_threads) {
> +		xa_for_each(&prefetches, i, thread) {
> +			flush_work(&thread->work);

Similarly this adds an interruptible wait. Ideally if we hit a signal
here we'd like to just be able to forget about the threads and let them
finish while we return?

Thanks, 
Thomas



> +			if (thread->err && (!err || err == -
> ENODATA))
> +				err = thread->err;
> +			kfree(thread);
>  		}
> -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> PAGES DONE");
> +		xa_destroy(&prefetches);
>  	}
>  
>  	return err;


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16 12:06       ` Mrozek, Michal
@ 2025-06-17 14:30         ` Matthew Brost
  0 siblings, 0 replies; 14+ messages in thread
From: Matthew Brost @ 2025-06-17 14:30 UTC (permalink / raw)
  To: Mrozek, Michal
  Cc: Thomas Hellström, intel-xe@lists.freedesktop.org,
	Ghimiray,  Himal Prasad

On Mon, Jun 16, 2025 at 06:06:51AM -0600, Mrozek, Michal wrote:
> > >>> > 2) Do we actually *want* to use 5 CPU cores for this?
> > >>>
> > >>> Yes, I profiled this with a test issuing 64MB prefetches—5 threads was
> > >>> ideal. I have a comment in the code about this. Once [1] lands, we’ll
> > >>> likely only need 2 threads on BMG. That would probably get us to a bus
> > >>> 8× faster than BMG; for 16×, we might need more threads. But I think
> > >>> we’ll always want at least 2, as there will always be some CPU
> > >>> overhead that limits copy bandwidth due to serialization.
> > >>
> > >>What I meant was IIRC NEO has previously been picky about starting
> > >>threads. Perhaps Michal can enlighten us here?
> 
> Multiple threads only gives benefits if we are able to overlap things that would otherwise keep the hardware idle.
> i.e. if single CPU thread is able to saturate system -> Vram bandwidth then there is no point to have multiple threads doing the same as
> all of those due to link sharing would end up later and we actually increase latencies instead of reducing those.
> 
> Simple example, if single thread saturate a link and whole copy operation is 1ms, then if you:
> - run 5 copies concurrently, all of those finish at 5ms mark due to link sharing
> - run 5 copies sequentially, one at a time, then first finish at 1ms, second at 2ms, third at 3ms and so on and this allows to unblock consumers way faster
> 
> Hence I would be very careful to use 5 threads to do CPU copies concurrently.
> Also you may explore vector intrinsics to do the transfers, sample -> https://github.com/pmodels/mpich/blob/27229e089554fee8ac0ac9da28e56fa7dc648a45/src/mpl/src/gpu/mpl_gpu_ze.c#L3345
> 

The bottleneck lies in the migrate_vma_* functions, which take longer
than the copy job. A single 2MB copy can reach 16 GB/s, but it must be
placed between migrate_vma_setup and migrate_vma_finalize. These steps
currently take approximately 310 µs, compared to around 130 µs for the
copy itself, which severely impacts prefetch performance—effectively
reducing it to 4 GB/s.

This was tested with prefetch benchmark in the following IGT series [1].

[1] https://patchwork.freedesktop.org/patch/658835/?series=150306&rev=1

> In general I would advise to do at most 2 copies concurrently to overlap on ramp up / ramp down between copies where machine can potentially go idle.
> To much copy parallelism may give diminishing returns, especially for larger 2MB pages.
>

Once the migrate_vma_* are faster (e.g., we 2M device pages), we should
only need 2 threads to hit copy bandwidth. I think this should scale to
a bus 8x than BMG if 2M device pages give us the speedup I am expecting.

I can change this series to use 2 threads only which I'd suspect the
prefetch bandwidth would be ~8 GB/s for now if using less threads is
preferred.

Matt

> For 4KB transfer I agree we may be bottlenecked more by copy engine inefficiency and running multiple (2) small copies may give some nice results.
> 
> And also we should be pretty conservative in using CPU threads especially in higher numbers, as at scale if we take too much threads we may introduce imbalance in the system which would create baubles and compromise performance due to butterfly effect.
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-17 12:43 ` [PATCH] drm/xe: Thread prefetch of SVM ranges Thomas Hellström
@ 2025-06-17 14:43   ` Matthew Brost
  0 siblings, 0 replies; 14+ messages in thread
From: Matthew Brost @ 2025-06-17 14:43 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-xe, himal.prasad.ghimiray, michal.mrozek

On Tue, Jun 17, 2025 at 02:43:27PM +0200, Thomas Hellström wrote:
> Hi, Matt
> 
> On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> > The migrate_vma_* functions are very CPU-intensive; as a result,
> > prefetching SVM ranges is limited by CPU performance rather than
> > paging
> > copy engine bandwidth. To accelerate SVM range prefetching, the step
> > that calls migrate_vma_* is now threaded. This uses a dedicated
> > workqueue, as the page fault workqueue cannot be shared without
> > risking
> > deadlocks—due to the prefetch IOCTL holding the VM lock in write mode
> > while work items in the page fault workqueue also require the VM
> > lock.
> > 
> > The prefetch workqueue is currently allocated in GT, similar to the
> > page
> > fault workqueue. While this is likely not the ideal location for
> > either,
> > refactoring will be deferred to a later patch.
> > 
> > Running xe_exec_system_allocator --r prefetch-benchmark, which tests
> > 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s with
> > this patch on drm-tip. Enabling high SLPC further increases
> > throughput
> > to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16 GB/s.
> > Both
> > of these optimizations are upcoming.
> 
> I looked at this again. I still think there are some optimizations that
> could be done in addition to Francois series to lessen the impact of
> this, but nevertheless to quickly get the real workload running on the
> GPU again when used on a single-client system.
> 
> I raised a question with the maintainers whether we should keep
> optimizations like this that improves performance for one client at the
> cost of others behind a kernel konfig, and also whether to expose
> parameters like the width of the queue both for this purpose and for
> parallel faults as sysfs knobs.
> 

sysfs knobs sounds reasonable to me, and perhaps just default to 2
threads and live with less than peak bandwidth for prefetch now until
Francios series lands?

> Some comments inline:
> 
> > 
> > v2:
> >  - Use dedicated prefetch workqueue
> >  - Pick dedicated prefetch thread count based on profiling
> >  - Skip threaded prefetch for only 1 range or if prefetching to SRAM
> >  - Fully tested
> > 
> > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
> >  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
> >  drivers/gpu/drm/xe/xe_vm.c           | 128 +++++++++++++++++++++----
> > --
> >  3 files changed, 135 insertions(+), 26 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > index e2d975b2fddb..941cca3371f2 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
> >  
> >  	destroy_workqueue(gt->usm.acc_wq);
> >  	destroy_workqueue(gt->usm.pf_wq);
> > +	if (gt->usm.prefetch_wq)
> > +		destroy_workqueue(gt->usm.prefetch_wq);
> >  }
> >  
> >  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> > *pf_queue)
> > @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt *gt,
> > struct pf_queue *pf_queue)
> >  	return 0;
> >  }
> >  
> > +static int prefetch_thread_count(struct xe_device *xe)
> > +{
> > +	if (!IS_DGFX(xe))
> > +		return 0;
> > +
> > +	/*
> > +	 * Based on profiling large aligned 2M prefetches, this is
> > the optimial
> > +	 * number of threads on BMG (only platform currently
> > supported). This
> > +	 * should be tuned for each supported platform and can
> > change on per
> > +	 * platform basis as optimizations land (e.g., large device
> > pages).
> > +	 */
> > +	return 5;
> > +}
> > +
> >  int xe_gt_pagefault_init(struct xe_gt *gt)
> >  {
> >  	struct xe_device *xe = gt_to_xe(gt);
> > -	int i, ret = 0;
> > +	int i, count, ret = 0;
> >  
> >  	if (!xe->info.has_usm)
> >  		return 0;
> > @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
> >  	if (!gt->usm.pf_wq)
> >  		return -ENOMEM;
> >  
> > +	count = prefetch_thread_count(xe);
> > +	if (count) {
> > +		gt->usm.prefetch_wq =
> > alloc_workqueue("xe_gt_prefetch_work_queue",
> > +						      WQ_UNBOUND |
> > WQ_HIGHPRI,
> > +						      count);
> 
> Can we avoid WQ_HIGHPRI here without losing performance?
> Also if count gets near the number of available high-performance cores,
> I suspect we might see less effect of parallelizing like this?
> 

Let me test that out today and give some numbers breakdown of bandwidth
per thread count / effect of WQ_HIGHPRI.

> 
> > +		if (!gt->usm.prefetch_wq) {
> > +			destroy_workqueue(gt->usm.pf_wq);
> > +			return -ENOMEM;
> > +		}
> > +	}
> > +
> >  	gt->usm.acc_wq =
> > alloc_workqueue("xe_gt_access_counter_work_queue",
> >  					 WQ_UNBOUND | WQ_HIGHPRI,
> >  					 NUM_ACC_QUEUE);
> >  	if (!gt->usm.acc_wq) {
> > +		if (gt->usm.prefetch_wq)
> > +			destroy_workqueue(gt->usm.prefetch_wq);
> >  		destroy_workqueue(gt->usm.pf_wq);
> >  		return -ENOMEM;
> >  	}
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > b/drivers/gpu/drm/xe/xe_gt_types.h
> > index 7def0959da35..d9ba4921b8ce 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -239,6 +239,8 @@ struct xe_gt {
> >  		u16 reserved_bcs_instance;
> >  		/** @usm.pf_wq: page fault work queue, unbound, high
> > priority */
> >  		struct workqueue_struct *pf_wq;
> > +		/** @usm.prefetch_wq: prefetch work queue, unbound,
> > high priority */
> > +		struct workqueue_struct *prefetch_wq;
> >  		/** @usm.acc_wq: access counter work queue, unbound,
> > high priority */
> >  		struct workqueue_struct *acc_wq;
> >  		/**
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 6ef8c4dab647..1ae8e03aead6 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma *vma)
> >  	return 0;
> >  }
> >  
> > -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> > +struct prefetch_thread {
> > +	struct work_struct work;
> > +	struct drm_gpusvm_ctx *ctx;
> > +	struct xe_vma *vma;
> > +	struct xe_svm_range *svm_range;
> > +	struct xe_tile *tile;
> > +	u32 region;
> > +	int err;
> > +};
> > +
> > +static void prefetch_work_func(struct work_struct *w)
> >  {
> > -	bool devmem_possible = IS_DGFX(vm->xe) &&
> > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > +	struct prefetch_thread *thread =
> > +		container_of(w, struct prefetch_thread, work);
> > +	struct xe_vma *vma = thread->vma;
> > +	struct xe_vm *vm = xe_vma_vm(vma);
> > +	struct xe_svm_range *svm_range = thread->svm_range;
> > +	u32 region = thread->region;
> > +	struct xe_tile *tile = thread->tile;
> >  	int err = 0;
> >  
> > -	struct xe_svm_range *svm_range;
> > +	if (!region) {
> > +		xe_svm_range_migrate_to_smem(vm, svm_range);
> > +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > vma, region)) {
> > +		err = xe_svm_alloc_vram(vm, tile, svm_range, thread-
> > >ctx);
> > +		if (err) {
> > +			drm_dbg(&vm->xe->drm,
> > +				"VRAM allocation failed, retry from
> > userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> > +				vm->usm.asid, &vm->svm.gpusvm,
> > ERR_PTR(err));
> > +			thread->err = -ENODATA;
> > +			return;
> > +		}
> > +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > MIGRATED TO VRAM");
> > +	}
> > +
> > +	err = xe_svm_range_get_pages(vm, svm_range, thread->ctx);
> > +	if (err) {
> > +		drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u,
> > gpusvm=%p, errno=%pe\n",
> > +			vm->usm.asid, &vm->svm.gpusvm,
> > ERR_PTR(err));
> > +		if (err == -EOPNOTSUPP || err == -EFAULT || err == -
> > EPERM)
> > +			err = -ENODATA;
> > +		thread->err = err;
> > +		return;
> > +	}
> > +
> > +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES
> > DONE");
> > +}
> > +
> > +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> > +{
> > +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > +	u32 j, region = op->prefetch_range.region;
> >  	struct drm_gpusvm_ctx ctx = {};
> > -	struct xe_tile *tile;
> > +	struct prefetch_thread stack_thread;
> > +	struct xe_svm_range *svm_range;
> > +	struct xarray prefetches;
> > +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> > +	struct xe_tile *tile = sram ? xe_device_get_root_tile(vm-
> > >xe) :
> > +		&vm->xe->tiles[region_to_mem_type[region] -
> > XE_PL_VRAM0];
> >  	unsigned long i;
> > -	u32 region;
> > +	bool devmem_possible = IS_DGFX(vm->xe) &&
> > +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > +	bool skip_threads = op->prefetch_range.ranges_count == 1 ||
> > sram;
> > +	struct prefetch_thread *thread = skip_threads ?
> > &stack_thread : NULL;
> > +	int err = 0;
> >  
> >  	if (!xe_vma_is_cpu_addr_mirror(vma))
> >  		return 0;
> >  
> > -	region = op->prefetch_range.region;
> > +	if (!skip_threads)
> > +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
> >  
> >  	ctx.read_only = xe_vma_read_only(vma);
> >  	ctx.devmem_possible = devmem_possible;
> >  	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
> >  
> > -	/* TODO: Threading the migration */
> >  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > -		if (!region)
> > -			xe_svm_range_migrate_to_smem(vm, svm_range);
> > +		if (!skip_threads) {
> > +			thread = kmalloc(sizeof(*thread),
> > GFP_KERNEL);
> > +			if (!thread)
> > +				goto wait_threads;
> >  
> > -		if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > vma, region)) {
> > -			tile = &vm->xe-
> > >tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> > -			err = xe_svm_alloc_vram(vm, tile, svm_range,
> > &ctx);
> > +			err = xa_alloc(&prefetches, &j, thread,
> > xa_limit_32b,
> > +				       GFP_KERNEL);
> 
> No locking (like in xarray) required here since prefetches is a stack
> variable, and no reason to expect cache trashing so use a linked list
> or simple array instead of an xarray?
> 

I think a simple array would be a good choice. Let me refactor this.

> 
> >  			if (err) {
> > -				drm_dbg(&vm->xe->drm, "VRAM
> > allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> > errno=%pe\n",
> > -					vm->usm.asid, &vm-
> > >svm.gpusvm, ERR_PTR(err));
> > -				return -ENODATA;
> > +				kfree(thread);
> > +				goto wait_threads;
> >  			}
> > -			xe_svm_range_debug(svm_range, "PREFETCH -
> > RANGE MIGRATED TO VRAM");
> >  		}
> >  
> > -		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
> > -		if (err) {
> > -			drm_dbg(&vm->xe->drm, "Get pages failed,
> > asid=%u, gpusvm=%p, errno=%pe\n",
> > -				vm->usm.asid, &vm->svm.gpusvm,
> > ERR_PTR(err));
> > -			if (err == -EOPNOTSUPP || err == -EFAULT ||
> > err == -EPERM)
> > -				err = -ENODATA;
> > -			return err;
> > +		INIT_WORK(&thread->work, prefetch_work_func);
> > +		thread->ctx = &ctx;
> > +		thread->vma = vma;
> > +		thread->svm_range = svm_range;
> > +		thread->tile = tile;
> > +		thread->region = region;
> > +		thread->err = 0;
> > +
> > +		if (skip_threads) {
> > +			prefetch_work_func(&thread->work);
> > +			if (thread->err)
> > +				return thread->err;
> > +		} else {
> > +			/*
> > +			 * Prefetch uses a dedicated workqueue, as
> > the page
> > +			 * fault workqueue cannot be shared without
> > risking
> > +			 * deadlocks—due to holding the VM lock in
> > write mode
> > +			 * here while work items in the page fault
> > workqueue
> > +			 * also require the VM lock.
> > +			 */
> 
> Hmm. This is weird. In principle, a parallel fault handler could be
> processing the same range simultaneously, and blow things up but since
> we hold the vm lock on behalf of the threads this doesn't happen. But
> if we were to properly annotate, for example drm_gpusvm_get_pages()
> with drm_gpusvm_driver_lock_held(), then that would assert. I don't
> think "let's hold the vm lock on behalf of the threads" is acceptable,
> really, unless we can find other examples in the kernel or preferrably
> even in drm.
> 
> This means we need some form of finer-grained locking in gpusvm, like
> for example a per-range lock, to be able to relax the vm lock to read
> mode both in the fault handler and here?
> 

This is the ultimate goal—to allow per-VM parallel faults. I hacked
together finer-grained locking a while back, but held off on posting it
until madvise and multi-GPU support landed, to avoid making it harder
for those features to merge.

I can post that refactor now if you think this a prerequisite to this
series.

> 
> > +			queue_work(tile->primary_gt-
> > >usm.prefetch_wq,
> > +				   &thread->work);
> > +		}
> > +	}
> > +
> > +wait_threads:
> > +	if (!skip_threads) {
> > +		xa_for_each(&prefetches, i, thread) {
> > +			flush_work(&thread->work);
> 
> Similarly this adds an interruptible wait. Ideally if we hit a signal
> here we'd like to just be able to forget about the threads and let them
> finish while we return?
> 

Is flush_work interruptible? This is undocumented and but from a look at
the code I don't believe it is. I agree ideally we'd want this be
interruptible but unsure if this is possible with the current workqueue
code.

Matt

> Thanks, 
> Thomas
> 
> 
> 
> > +			if (thread->err && (!err || err == -
> > ENODATA))
> > +				err = thread->err;
> > +			kfree(thread);
> >  		}
> > -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> > PAGES DONE");
> > +		xa_destroy(&prefetches);
> >  	}
> >  
> >  	return err;
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/xe: Thread prefetch of SVM ranges
  2025-06-16 11:20     ` Thomas Hellström
@ 2025-06-17 17:10       ` Matthew Brost
  0 siblings, 0 replies; 14+ messages in thread
From: Matthew Brost @ 2025-06-17 17:10 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-xe, himal.prasad.ghimiray, michal.mrozek

On Mon, Jun 16, 2025 at 01:20:16PM +0200, Thomas Hellström wrote:
> Hi,
> 
> Wait, let me take a closer look. I got the impression from the commit
> message that parallelization was done on a lower level than it actually
> was.
> 

Yea ok, I replied to other responses, let's continue the conversation
there. FWIW I think this correct level to do this.

> Thomas.
> 
> 
> On Mon, 2025-06-16 at 01:58 -0700, Matthew Brost wrote:
> > On Mon, Jun 16, 2025 at 10:28:16AM +0200, Thomas Hellström wrote:
> > > On Sun, 2025-06-15 at 23:47 -0700, Matthew Brost wrote:
> > > > The migrate_vma_* functions are very CPU-intensive; as a result,
> > > > prefetching SVM ranges is limited by CPU performance rather than
> > > > paging
> > > > copy engine bandwidth. To accelerate SVM range prefetching, the
> > > > step
> > > > that calls migrate_vma_* is now threaded. This uses a dedicated
> > > > workqueue, as the page fault workqueue cannot be shared without
> > > > risking
> > > > deadlocks—due to the prefetch IOCTL holding the VM lock in write
> > > > mode
> > > > while work items in the page fault workqueue also require the VM
> > > > lock.
> > > > 
> > > > The prefetch workqueue is currently allocated in GT, similar to
> > > > the
> > > > page
> > > > fault workqueue. While this is likely not the ideal location for
> > > > either,
> > > > refactoring will be deferred to a later patch.
> > > > 
> > > > Running xe_exec_system_allocator --r prefetch-benchmark, which
> > > > tests
> > > > 64MB prefetches, shows an increase from ~4.35 GB/s to 12.25 GB/s
> > > > with
> > > > this patch on drm-tip. Enabling high SLPC further increases
> > > > throughput
> > > > to ~15.25 GB/s, and combining SLPC with ULLS raises it to ~16
> > > > GB/s.
> > > > Both
> > > > of these optimizations are upcoming.
> > > > 
> > > > v2:
> > > >  - Use dedicated prefetch workqueue
> > > >  - Pick dedicated prefetch thread count based on profiling
> > > >  - Skip threaded prefetch for only 1 range or if prefetching to
> > > > SRAM
> > > >  - Fully tested
> > > > 
> > > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > > Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
> > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > 
> > > Hi,
> > > Is this really the right place to do optimizations like this?
> > > 
> > 
> > Yes, for now.
> > 
> > > The migration takes place in xe_svm_alloc_vram() and is being moved
> > > to
> > > drm_pagemap_populate_mm(). If those functions are considered to be
> > > slow
> > > then they should be optimized, rather than calling them multiple
> > > times
> > > in parallel from an outer layer? 
> > > 
> > 
> > Shared code with an already-parallel fault handler... Prefetch is
> > just
> > adding parallelism too.
> > 
> > > Before doing something like this I think we need to consider
> > > 
> > > 1) Why are the migrate functions so cpu consuming? Do we have a
> > > performance profile for it?
> > 
> > Yes, I have profiled this. On BMG, a 2MB migrate takes approximately
> > 300µs of CPU overhead in the migrate_vma_* functions, while a copy
> > job
> > takes around 130µs. The copy must complete between setup and
> > finalize,
> > which serializes this flow.
> > 
> > Thus, as of now, the only way to saturate the copy engine is to use
> > threads so that CPU cycles can overlap.
> > 
> > Have you caught up on Nvidia's series [1] and what Francois is
> > working
> > on? I'd guess we'll go from ~300µs to ~7µs once that lands.
> > 
> > I don't know why the migrate_vma_* functions take so long—the core MM
> > code is tough to read. I suppose I could hack it to find out.
> > 
> > [1]
> > https://lore.kernel.org/linux-mm/20250306044239.3874247-1-balbirs@nvidia.com/
> >  
> > 
> > > 2) Do we actually *want* to use 5 CPU cores for this?
> > 
> > Yes, I profiled this with a test issuing 64MB prefetches—5 threads
> > was
> > ideal. I have a comment in the code about this. Once [1] lands, we’ll
> > likely only need 2 threads on BMG. That would probably get us to a
> > bus
> > 8× faster than BMG; for 16×, we might need more threads. But I think
> > we’ll always want at least 2, as there will always be some CPU
> > overhead
> > that limits copy bandwidth due to serialization.
> > 
> > > 3) Isn't a single CPU write-combined non-temporal CPU memcopy
> > > enough to
> > > saturate the system->VRAM bandwith?
> > > 
> > 
> > I'm not entirely following (see above), but almost certainly not.
> > 
> > Maat
> > 
> > > Thanks,
> > > Thomas
> > > 
> > > 
> > > 
> > > > ---
> > > >  drivers/gpu/drm/xe/xe_gt_pagefault.c |  31 ++++++-
> > > >  drivers/gpu/drm/xe/xe_gt_types.h     |   2 +
> > > >  drivers/gpu/drm/xe/xe_vm.c           | 128
> > > > +++++++++++++++++++++----
> > > > --
> > > >  3 files changed, 135 insertions(+), 26 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > > index e2d975b2fddb..941cca3371f2 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > > @@ -400,6 +400,8 @@ static void pagefault_fini(void *arg)
> > > >  
> > > >  	destroy_workqueue(gt->usm.acc_wq);
> > > >  	destroy_workqueue(gt->usm.pf_wq);
> > > > +	if (gt->usm.prefetch_wq)
> > > > +		destroy_workqueue(gt->usm.prefetch_wq);
> > > >  }
> > > >  
> > > >  static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue
> > > > *pf_queue)
> > > > @@ -438,10 +440,24 @@ static int xe_alloc_pf_queue(struct xe_gt
> > > > *gt,
> > > > struct pf_queue *pf_queue)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +static int prefetch_thread_count(struct xe_device *xe)
> > > > +{
> > > > +	if (!IS_DGFX(xe))
> > > > +		return 0;
> > > > +
> > > > +	/*
> > > > +	 * Based on profiling large aligned 2M prefetches, this
> > > > is
> > > > the optimial
> > > > +	 * number of threads on BMG (only platform currently
> > > > supported). This
> > > > +	 * should be tuned for each supported platform and can
> > > > change on per
> > > > +	 * platform basis as optimizations land (e.g., large
> > > > device
> > > > pages).
> > > > +	 */
> > > > +	return 5;
> > > > +}
> > > > +
> > > >  int xe_gt_pagefault_init(struct xe_gt *gt)
> > > >  {
> > > >  	struct xe_device *xe = gt_to_xe(gt);
> > > > -	int i, ret = 0;
> > > > +	int i, count, ret = 0;
> > > >  
> > > >  	if (!xe->info.has_usm)
> > > >  		return 0;
> > > > @@ -462,10 +478,23 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
> > > >  	if (!gt->usm.pf_wq)
> > > >  		return -ENOMEM;
> > > >  
> > > > +	count = prefetch_thread_count(xe);
> > > > +	if (count) {
> > > > +		gt->usm.prefetch_wq =
> > > > alloc_workqueue("xe_gt_prefetch_work_queue",
> > > > +						      WQ_UNBOUND
> > > > |
> > > > WQ_HIGHPRI,
> > > > +						      count);
> > > > +		if (!gt->usm.prefetch_wq) {
> > > > +			destroy_workqueue(gt->usm.pf_wq);
> > > > +			return -ENOMEM;
> > > > +		}
> > > > +	}
> > > > +
> > > >  	gt->usm.acc_wq =
> > > > alloc_workqueue("xe_gt_access_counter_work_queue",
> > > >  					 WQ_UNBOUND |
> > > > WQ_HIGHPRI,
> > > >  					 NUM_ACC_QUEUE);
> > > >  	if (!gt->usm.acc_wq) {
> > > > +		if (gt->usm.prefetch_wq)
> > > > +			destroy_workqueue(gt->usm.prefetch_wq);
> > > >  		destroy_workqueue(gt->usm.pf_wq);
> > > >  		return -ENOMEM;
> > > >  	}
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > > > b/drivers/gpu/drm/xe/xe_gt_types.h
> > > > index 7def0959da35..d9ba4921b8ce 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > > @@ -239,6 +239,8 @@ struct xe_gt {
> > > >  		u16 reserved_bcs_instance;
> > > >  		/** @usm.pf_wq: page fault work queue, unbound,
> > > > high
> > > > priority */
> > > >  		struct workqueue_struct *pf_wq;
> > > > +		/** @usm.prefetch_wq: prefetch work queue,
> > > > unbound,
> > > > high priority */
> > > > +		struct workqueue_struct *prefetch_wq;
> > > >  		/** @usm.acc_wq: access counter work queue,
> > > > unbound,
> > > > high priority */
> > > >  		struct workqueue_struct *acc_wq;
> > > >  		/**
> > > > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > > > b/drivers/gpu/drm/xe/xe_vm.c
> > > > index 6ef8c4dab647..1ae8e03aead6 100644
> > > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > > @@ -2885,52 +2885,130 @@ static int check_ufence(struct xe_vma
> > > > *vma)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > -static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op
> > > > *op)
> > > > +struct prefetch_thread {
> > > > +	struct work_struct work;
> > > > +	struct drm_gpusvm_ctx *ctx;
> > > > +	struct xe_vma *vma;
> > > > +	struct xe_svm_range *svm_range;
> > > > +	struct xe_tile *tile;
> > > > +	u32 region;
> > > > +	int err;
> > > > +};
> > > > +
> > > > +static void prefetch_work_func(struct work_struct *w)
> > > >  {
> > > > -	bool devmem_possible = IS_DGFX(vm->xe) &&
> > > > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > > > -	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > > +	struct prefetch_thread *thread =
> > > > +		container_of(w, struct prefetch_thread, work);
> > > > +	struct xe_vma *vma = thread->vma;
> > > > +	struct xe_vm *vm = xe_vma_vm(vma);
> > > > +	struct xe_svm_range *svm_range = thread->svm_range;
> > > > +	u32 region = thread->region;
> > > > +	struct xe_tile *tile = thread->tile;
> > > >  	int err = 0;
> > > >  
> > > > -	struct xe_svm_range *svm_range;
> > > > +	if (!region) {
> > > > +		xe_svm_range_migrate_to_smem(vm, svm_range);
> > > > +	} else if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > > > vma, region)) {
> > > > +		err = xe_svm_alloc_vram(vm, tile, svm_range,
> > > > thread-
> > > > > ctx);
> > > > +		if (err) {
> > > > +			drm_dbg(&vm->xe->drm,
> > > > +				"VRAM allocation failed, retry
> > > > from
> > > > userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> > > > +				vm->usm.asid, &vm->svm.gpusvm,
> > > > ERR_PTR(err));
> > > > +			thread->err = -ENODATA;
> > > > +			return;
> > > > +		}
> > > > +		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > > > MIGRATED TO VRAM");
> > > > +	}
> > > > +
> > > > +	err = xe_svm_range_get_pages(vm, svm_range, thread-
> > > > >ctx);
> > > > +	if (err) {
> > > > +		drm_dbg(&vm->xe->drm, "Get pages failed,
> > > > asid=%u,
> > > > gpusvm=%p, errno=%pe\n",
> > > > +			vm->usm.asid, &vm->svm.gpusvm,
> > > > ERR_PTR(err));
> > > > +		if (err == -EOPNOTSUPP || err == -EFAULT || err
> > > > == -
> > > > EPERM)
> > > > +			err = -ENODATA;
> > > > +		thread->err = err;
> > > > +		return;
> > > > +	}
> > > > +
> > > > +	xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET
> > > > PAGES
> > > > DONE");
> > > > +}
> > > > +
> > > > +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op
> > > > *op)
> > > > +{
> > > > +	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > > +	u32 j, region = op->prefetch_range.region;
> > > >  	struct drm_gpusvm_ctx ctx = {};
> > > > -	struct xe_tile *tile;
> > > > +	struct prefetch_thread stack_thread;
> > > > +	struct xe_svm_range *svm_range;
> > > > +	struct xarray prefetches;
> > > > +	bool sram = region_to_mem_type[region] == XE_PL_TT;
> > > > +	struct xe_tile *tile = sram ?
> > > > xe_device_get_root_tile(vm-
> > > > > xe) :
> > > > +		&vm->xe->tiles[region_to_mem_type[region] -
> > > > XE_PL_VRAM0];
> > > >  	unsigned long i;
> > > > -	u32 region;
> > > > +	bool devmem_possible = IS_DGFX(vm->xe) &&
> > > > +		IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
> > > > +	bool skip_threads = op->prefetch_range.ranges_count == 1
> > > > ||
> > > > sram;
> > > > +	struct prefetch_thread *thread = skip_threads ?
> > > > &stack_thread : NULL;
> > > > +	int err = 0;
> > > >  
> > > >  	if (!xe_vma_is_cpu_addr_mirror(vma))
> > > >  		return 0;
> > > >  
> > > > -	region = op->prefetch_range.region;
> > > > +	if (!skip_threads)
> > > > +		xa_init_flags(&prefetches, XA_FLAGS_ALLOC);
> > > >  
> > > >  	ctx.read_only = xe_vma_read_only(vma);
> > > >  	ctx.devmem_possible = devmem_possible;
> > > >  	ctx.check_pages_threshold = devmem_possible ? SZ_64K :
> > > > 0;
> > > >  
> > > > -	/* TODO: Threading the migration */
> > > >  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > > > -		if (!region)
> > > > -			xe_svm_range_migrate_to_smem(vm,
> > > > svm_range);
> > > > +		if (!skip_threads) {
> > > > +			thread = kmalloc(sizeof(*thread),
> > > > GFP_KERNEL);
> > > > +			if (!thread)
> > > > +				goto wait_threads;
> > > >  
> > > > -		if
> > > > (xe_svm_range_needs_migrate_to_vram(svm_range,
> > > > vma, region)) {
> > > > -			tile = &vm->xe-
> > > > > tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> > > > -			err = xe_svm_alloc_vram(vm, tile,
> > > > svm_range,
> > > > &ctx);
> > > > +			err = xa_alloc(&prefetches, &j, thread,
> > > > xa_limit_32b,
> > > > +				       GFP_KERNEL);
> > > >  			if (err) {
> > > > -				drm_dbg(&vm->xe->drm, "VRAM
> > > > allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> > > > errno=%pe\n",
> > > > -					vm->usm.asid, &vm-
> > > > > svm.gpusvm, ERR_PTR(err));
> > > > -				return -ENODATA;
> > > > +				kfree(thread);
> > > > +				goto wait_threads;
> > > >  			}
> > > > -			xe_svm_range_debug(svm_range, "PREFETCH
> > > > -
> > > > RANGE MIGRATED TO VRAM");
> > > >  		}
> > > >  
> > > > -		err = xe_svm_range_get_pages(vm, svm_range,
> > > > &ctx);
> > > > -		if (err) {
> > > > -			drm_dbg(&vm->xe->drm, "Get pages failed,
> > > > asid=%u, gpusvm=%p, errno=%pe\n",
> > > > -				vm->usm.asid, &vm->svm.gpusvm,
> > > > ERR_PTR(err));
> > > > -			if (err == -EOPNOTSUPP || err == -EFAULT
> > > > ||
> > > > err == -EPERM)
> > > > -				err = -ENODATA;
> > > > -			return err;
> > > > +		INIT_WORK(&thread->work, prefetch_work_func);
> > > > +		thread->ctx = &ctx;
> > > > +		thread->vma = vma;
> > > > +		thread->svm_range = svm_range;
> > > > +		thread->tile = tile;
> > > > +		thread->region = region;
> > > > +		thread->err = 0;
> > > > +
> > > > +		if (skip_threads) {
> > > > +			prefetch_work_func(&thread->work);
> > > > +			if (thread->err)
> > > > +				return thread->err;
> > > > +		} else {
> > > > +			/*
> > > > +			 * Prefetch uses a dedicated workqueue,
> > > > as
> > > > the page
> > > > +			 * fault workqueue cannot be shared
> > > > without
> > > > risking
> > > > +			 * deadlocks—due to holding the VM lock
> > > > in
> > > > write mode
> > > > +			 * here while work items in the page
> > > > fault
> > > > workqueue
> > > > +			 * also require the VM lock.
> > > > +			 */
> > > > +			queue_work(tile->primary_gt-
> > > > > usm.prefetch_wq,
> > > > +				   &thread->work);
> > > > +		}
> > > > +	}
> > > > +
> > > > +wait_threads:
> > > > +	if (!skip_threads) {
> > > > +		xa_for_each(&prefetches, i, thread) {
> > > > +			flush_work(&thread->work);
> > > > +			if (thread->err && (!err || err == -
> > > > ENODATA))
> > > > +				err = thread->err;
> > > > +			kfree(thread);
> > > >  		}
> > > > -		xe_svm_range_debug(svm_range, "PREFETCH - RANGE
> > > > GET
> > > > PAGES DONE");
> > > > +		xa_destroy(&prefetches);
> > > >  	}
> > > >  
> > > >  	return err;
> > > 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-06-17 17:09 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-16  6:47 [PATCH] drm/xe: Thread prefetch of SVM ranges Matthew Brost
2025-06-16  8:28 ` Thomas Hellström
2025-06-16  8:58   ` Matthew Brost
2025-06-16  9:24     ` Thomas Hellström
2025-06-16 12:06       ` Mrozek, Michal
2025-06-17 14:30         ` Matthew Brost
2025-06-16 11:20     ` Thomas Hellström
2025-06-17 17:10       ` Matthew Brost
2025-06-16 11:51 ` ✓ CI.KUnit: success for drm/xe: Thread prefetch of SVM ranges (rev2) Patchwork
2025-06-16 12:32 ` ✓ Xe.CI.BAT: " Patchwork
2025-06-16 17:53 ` ✗ Xe.CI.Full: failure " Patchwork
2025-06-17 12:43 ` [PATCH] drm/xe: Thread prefetch of SVM ranges Thomas Hellström
2025-06-17 14:43   ` Matthew Brost
  -- strict thread matches above, loose matches on Subject: below --
2025-05-28 17:27 Matthew Brost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox