[PATCH i-g-t v14 08/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU fault handling test

public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed

From: nishit.sharma@intel.com
To: igt-dev@lists.freedesktop.org, nishit.sharma@intel.com,
	sai.gowtham.ch@intel.com
Subject: [PATCH i-g-t v14 08/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU fault handling test
Date: Mon,  5 Jan 2026 08:47:47 +0000	[thread overview]
Message-ID: <20260105084750.190346-9-nishit.sharma@intel.com> (raw)
In-Reply-To: <20260105084750.190346-1-nishit.sharma@intel.com>

From: Nishit Sharma <nishit.sharma@intel.com>

This test intentionally triggers page faults by accessing regions without
prefetch for both GPUs in a multi-GPU environment.

Signed-off-by: Nishit Sharma <nishit.sharma@intel.com>
Reviewed-by: Pravalika Gurram <pravalika.gurram@intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 tests/intel/xe_multigpu_svm.c | 145 ++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/tests/intel/xe_multigpu_svm.c b/tests/intel/xe_multigpu_svm.c
index 903ac636b..a38b29aab 100644
--- a/tests/intel/xe_multigpu_svm.c
+++ b/tests/intel/xe_multigpu_svm.c
@@ -15,6 +15,7 @@
 
 #include "time.h"
 
+#include "xe/xe_gt.h"
 #include "xe/xe_ioctl.h"
 #include "xe/xe_query.h"
 #include "xe/xe_util.h"
@@ -89,6 +90,17 @@
  *	Measure latency of cross-GPU memory copy operations with prefetch
  *	to evaluate copy performance with memory migration to local VRAM
  *
+ * SUBTEST: mgpu-pagefault-basic
+ * Description:
+ *	Test cross-GPU page fault handling where one GPU writes to memory
+ *	and another GPU reads, triggering page faults without prefetch to
+ *	validate on-demand page migration across GPUs
+ *
+ * SUBTEST: mgpu-pagefault-prefetch
+ * Description:
+ *	Test cross-GPU memory access with prefetch to verify page fault
+ *	suppression when memory is pre-migrated to target GPU's VRAM
+ *
  */
 
 #define MAX_XE_REGIONS	8
@@ -108,6 +120,7 @@
 #define MULTIGPU_COH_FAIL		BIT(5)
 #define MULTIGPU_PERF_OP		BIT(6)
 #define MULTIGPU_PERF_REM_COPY		BIT(7)
+#define MULTIGPU_PFAULT_OP		BIT(8)
 
 #define INIT	2
 #define STORE	3
@@ -163,6 +176,11 @@ static void gpu_latency_test_wrapper(struct xe_svm_gpu_info *src,
 				     struct drm_xe_engine_class_instance *eci,
 				     unsigned int flags);
 
+static void gpu_fault_test_wrapper(struct xe_svm_gpu_info *src,
+				   struct xe_svm_gpu_info *dst,
+				   struct drm_xe_engine_class_instance *eci,
+				   unsigned int flags);
+
 static void
 create_vm_and_queue(struct xe_svm_gpu_info *gpu, struct drm_xe_engine_class_instance *eci,
 		    uint32_t *vm, uint32_t *exec_queue)
@@ -930,6 +948,117 @@ latency_test_multigpu(struct xe_svm_gpu_info *gpu1,
 	cleanup_vm_and_queue(gpu2, vm[1], exec_queue[1]);
 }
 
+static void
+pagefault_test_multigpu(struct xe_svm_gpu_info *gpu1,
+			struct xe_svm_gpu_info *gpu2,
+			struct drm_xe_engine_class_instance *eci,
+			unsigned int flags)
+{
+	uint64_t addr;
+	uint64_t addr1;
+	uint32_t vm[2];
+	uint32_t exec_queue[2];
+	uint32_t batch_bo[2];
+	uint64_t batch_addr[2];
+	struct drm_xe_sync sync = {};
+	uint64_t *sync_addr;
+	void *data, *verify_result;
+	const char *pf_count_stat = "svm_pagefault_count";
+	int pf_count_gpu1_before, pf_count_gpu1_after;
+	int pf_count_gpu2_before, pf_count_gpu2_after;
+	bool prefetch_req = flags & MULTIGPU_PREFETCH;
+
+	/* Skip if either GPU doesn't support faults */
+	if (mgpu_check_fault_support(gpu1, gpu2))
+		return;
+
+	create_vm_and_queue(gpu1, eci, &vm[0], &exec_queue[0]);
+	create_vm_and_queue(gpu2, eci, &vm[1], &exec_queue[1]);
+
+	data = aligned_alloc(SZ_2M, SZ_4K);
+	igt_assert(data);
+	memset(data, 0, SZ_4K);
+	addr = to_user_pointer(data);
+
+	/* Allocate verification buffer for GPU2 to copy into */
+	verify_result = aligned_alloc(SZ_2M, SZ_4K);
+	igt_assert(verify_result);
+	addr1 = to_user_pointer(verify_result);
+
+	/* === Phase 1: GPU1 writes to addr === */
+	pf_count_gpu1_before = xe_gt_stats_get_count(gpu1->fd, eci->gt_id, pf_count_stat);
+
+	/* GPU1 --> Creating batch with value and executing STORE op */
+	gpu_batch_create(gpu1, vm[0], exec_queue[0], addr, 0,
+			 &batch_bo[0], &batch_addr[0], flags, DWORD);
+
+	/*GPU1: Madvise and Prefetch Ops */
+	gpu_madvise_exec_sync(gpu1, vm[0], exec_queue[0], addr, &batch_addr[0], flags, NULL);
+
+	pf_count_gpu1_after = xe_gt_stats_get_count(gpu1->fd, eci->gt_id, pf_count_stat);
+
+	if (prefetch_req) {
+		/* With prefetch: expect NO page faults */
+		igt_assert_eq(pf_count_gpu1_after, pf_count_gpu1_before);
+		igt_info("GPU1 write with prefetch: No page faults (as expected)\n");
+	} else {
+		/* Without prefetch: expect page faults */
+		igt_debug("Pagefault count %s\n",
+			  pf_count_gpu1_after > pf_count_gpu1_before
+			  ? "increased"
+			  : "not increased");
+		igt_info("GPU1 write without prefetch: %d page faults\n",
+			 pf_count_gpu1_after - pf_count_gpu1_before);
+	}
+
+	/* === Phase 2: GPU2 reads from addr (cross-GPU access) === */
+	pf_count_gpu2_before = xe_gt_stats_get_count(gpu2->fd, eci->gt_id, pf_count_stat);
+
+	/* GPU2 --> Create batch for GPU2 to copy from addr (GPU1's memory) to verify_result */
+	gpu_batch_create(gpu2, vm[1], exec_queue[1], addr, addr1,
+			 &batch_bo[1], &batch_addr[1], flags, INIT);
+
+	/* Prefetch src buffer (addr) to avoid page faults */
+	xe_multigpu_madvise(gpu2->fd, vm[1], addr, SZ_4K, 0,
+			    DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC,
+			    gpu2->fd, 0, gpu2->vram_regions[0], exec_queue[1]);
+
+	setup_sync(&sync, &sync_addr, BIND_SYNC_VAL);
+	xe_multigpu_prefetch(gpu2->fd, vm[1], addr, SZ_4K, &sync,
+			     sync_addr, exec_queue[1], flags);
+
+	free(sync_addr);
+
+	/*GPU2: Madvise and Prefetch Ops */
+	gpu_madvise_exec_sync(gpu2, vm[1], exec_queue[1], addr1, &batch_addr[1], flags, NULL);
+
+	pf_count_gpu2_after = xe_gt_stats_get_count(gpu2->fd, eci->gt_id, pf_count_stat);
+
+	if (prefetch_req) {
+		/* With prefetch: expect NO page faults on GPU2 */
+		igt_assert_eq(pf_count_gpu2_after, pf_count_gpu2_before);
+		igt_info("GPU2 cross-GPU read with prefetch: No page faults (as expected)\n");
+	} else {
+		/* Without prefetch: expect cross-GPU page faults */
+		igt_debug("Pagefault count %s\n",
+			  pf_count_gpu2_after > pf_count_gpu2_before
+			  ? "increased"
+			  : "not increased");
+		igt_info("GPU2 cross-GPU read without prefetch: %d page faults\n",
+			 pf_count_gpu2_after - pf_count_gpu2_before);
+	}
+
+	munmap((void *)batch_addr[0], BATCH_SIZE(gpu1->fd));
+	munmap((void *)batch_addr[1], BATCH_SIZE(gpu2->fd));
+	batch_fini(gpu1->fd, vm[0], batch_bo[0], batch_addr[0]);
+	batch_fini(gpu2->fd, vm[1], batch_bo[1], batch_addr[0]);
+	free(data);
+	free(verify_result);
+
+	cleanup_vm_and_queue(gpu1, vm[0], exec_queue[0]);
+	cleanup_vm_and_queue(gpu2, vm[1], exec_queue[1]);
+}
+
 static void
 gpu_mem_access_wrapper(struct xe_svm_gpu_info *src,
 		       struct xe_svm_gpu_info *dst,
@@ -978,6 +1107,18 @@ gpu_latency_test_wrapper(struct xe_svm_gpu_info *src,
 	latency_test_multigpu(src, dst, eci, flags);
 }
 
+static void
+gpu_fault_test_wrapper(struct xe_svm_gpu_info *src,
+		       struct xe_svm_gpu_info *dst,
+		       struct drm_xe_engine_class_instance *eci,
+		       unsigned int flags)
+{
+	igt_assert(src);
+	igt_assert(dst);
+
+	pagefault_test_multigpu(src, dst, eci, flags);
+}
+
 static void
 test_mgpu_exec(int gpu_cnt, struct xe_svm_gpu_info *gpus,
 	       struct drm_xe_engine_class_instance *eci,
@@ -991,6 +1132,8 @@ test_mgpu_exec(int gpu_cnt, struct xe_svm_gpu_info *gpus,
 		for_each_gpu_pair(gpu_cnt, gpus, eci, gpu_coherecy_test_wrapper, flags);
 	if (flags & MULTIGPU_PERF_OP)
 		for_each_gpu_pair(gpu_cnt, gpus, eci, gpu_latency_test_wrapper, flags);
+	if (flags & MULTIGPU_PFAULT_OP)
+		for_each_gpu_pair(gpu_cnt, gpus, eci, gpu_fault_test_wrapper, flags);
 }
 
 struct section {
@@ -1026,6 +1169,8 @@ int igt_main()
 		{ "latency-prefetch", MULTIGPU_PREFETCH | MULTIGPU_PERF_OP },
 		{ "latency-copy-prefetch",
 		  MULTIGPU_PREFETCH | MULTIGPU_PERF_OP | MULTIGPU_PERF_REM_COPY },
+		{ "pagefault-basic", MULTIGPU_PFAULT_OP },
+		{ "pagefault-prefetch", MULTIGPU_PREFETCH | MULTIGPU_PFAULT_OP },
 		{ NULL },
 	};
 
-- 
2.48.1

next prev parent reply	other threads:[~2026-01-05  8:47 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-05  8:47 [PATCH i-g-t v14 00/11] Madvise feature in SVM for Multi-GPU configs nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 01/11] drm-uapi/xe: Sync with madvise interface nishit.sharma
2026-04-02 19:23   ` Dixit, Ashutosh
2026-01-05  8:47 ` [PATCH i-g-t v14 02/11] lib/xe: Add instance parameter to xe_vm_madvise nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 03/11] lib/xe: Add synchronous helpers for VM bind/unbind operations nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 04/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU xGPU memory access test nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 05/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU atomic operations nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 06/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU coherency test nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 07/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU performance test nishit.sharma
2026-01-05  8:47 ` nishit.sharma [this message]
2026-01-05  8:53   ` [PATCH i-g-t v14 08/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU fault handling test Ch, Sai Gowtham
2026-01-05  8:47 ` [PATCH i-g-t v14 09/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU simultaneous access test nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 10/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU conflicting madvise test nishit.sharma
2026-01-05  8:47 ` [PATCH i-g-t v14 11/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU migration test nishit.sharma
2026-01-05 14:44 ` ✗ Fi.CI.BUILD: failure for Madvise feature in SVM for Multi-GPU configs Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:903ac636 dfblob:a38b29aa )
 OR (
bs:"[PATCH i-g-t v14 08/11] tests/intel/xe_multigpu_svm: Add SVM multi-GPU fault handling test" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260105084750.190346-9-nishit.sharma@intel.com \
    --to=nishit.sharma@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=sai.gowtham.ch@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox