Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH] text/xe/xe_exec_basic: Wait for the correct vm_bind before exec
@ 2023-03-15 14:18 Thomas Hellström
  2023-03-15 15:41 ` Matthew Brost
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Thomas Hellström @ 2023-03-15 14:18 UTC (permalink / raw)
  To: igt-dev; +Cc: Thomas Hellström

The test was submitting the same syncobj for signalling to all
vm_bind operations, and then awaited it in all execs. However, the
binds don't necessarily complete in order, leading to GPU hangs if
an exec is launched before its bind is complete:

sudo ./xe_exec_basic --r many-engines-many-vm-basic-defer-mmap
IGT-Version: 1.26-g9b86de12 (x86_64) (Linux: 6.1.0+ x86_64)
Starting subtest: many-engines-many-vm-basic-defer-mmap
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Test assertion failure function __xe_exec_assert, file ../lib/xe/xe_ioctl.c:373:
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Failed assertion: igt_ioctl(fd, (((1U) << (((0+8)+8)+14)) | ((('d')) << (0+8)) | (((0x40 + 0x08)) << 0) | ((((sizeof(struct drm_xe_exec)))) << ((0+8)+8))), exec) == 0
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Last errno: 125, Operation canceled
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: error: -1 != 0
Stack trace:
Subtest many-engines-many-vm-basic-defer-mmap failed.
**** DEBUG ****
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Test assertion failure function __xe_exec_assert, file ../lib/xe/xe_ioctl.c:373:
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Failed assertion: igt_ioctl(fd, (((1U) << (((0+8)+8)+14)) | ((('d')) << (0+8)) | (((0x40 + 0x08)) << 0) | ((((sizeof(struct drm_xe_exec)))) << ((0+8)+8))), exec) == 0
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Last errno: 125, Operation canceled
(xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: error: -1 != 0
(xe_exec_basic:4204) igt_core-INFO: Stack trace:
****  END  ****
Subtest many-engines-many-vm-basic-defer-mmap: FAIL (0.231s)

Fix this by using a unique syncobj per vm which is awaited in
execs touching that VM.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 tests/xe/xe_exec_basic.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/tests/xe/xe_exec_basic.c b/tests/xe/xe_exec_basic.c
index cb0b8d2c..c1069829 100644
--- a/tests/xe/xe_exec_basic.c
+++ b/tests/xe/xe_exec_basic.c
@@ -93,6 +93,7 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 	uint32_t engines[MAX_N_ENGINES];
 	uint32_t bind_engines[MAX_N_ENGINES];
 	uint32_t syncobjs[MAX_N_ENGINES];
+	uint32_t bind_syncobjs[MAX_N_ENGINES];
 	size_t bo_size;
 	uint32_t bo = 0;
 	struct {
@@ -150,10 +151,11 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 		else
 			bind_engines[i] = 0;
 		syncobjs[i] = syncobj_create(fd, 0);
+		bind_syncobjs[i] = syncobj_create(fd, 0);
 	};
 
-	sync[0].handle = syncobj_create(fd, 0);
 	for (i = 0; i < n_vm; ++i) {
+		sync[0].handle = bind_syncobjs[i];
 		if (bo)
 			xe_vm_bind_async(fd, vm[i], bind_engines[i], bo, 0,
 					 addr[i], bo_size, sync, 1);
@@ -167,7 +169,8 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 		data = xe_bo_map(fd, bo, bo_size);
 
 	for (i = 0; i < n_execs; i++) {
-		uint64_t __addr = addr[i % n_vm];
+		int cur_vm = i % n_vm;
+		uint64_t __addr = addr[cur_vm];
 		uint64_t batch_offset = (char *)&data[i].batch - (char *)data;
 		uint64_t batch_addr = __addr + batch_offset;
 		uint64_t sdi_offset = (char *)&data[i].data - (char *)data;
@@ -183,6 +186,7 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 		igt_assert(b <= ARRAY_SIZE(data[i].batch));
 
 		sync[0].flags &= ~DRM_XE_SYNC_SIGNAL;
+		sync[0].handle = bind_syncobjs[cur_vm];
 		sync[1].flags |= DRM_XE_SYNC_SIGNAL;
 		sync[1].handle = syncobjs[e];
 
@@ -193,7 +197,7 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 		__xe_exec_assert(fd, &exec);
 
 		if (flags & REBIND && i + 1 != n_execs) {
-			uint32_t __vm = vm[i % n_vm];
+			uint32_t __vm = vm[cur_vm];
 
 			sync[1].flags &= ~DRM_XE_SYNC_SIGNAL;
 			xe_vm_unbind_async(fd, __vm, bind_engines[e], 0,
@@ -243,7 +247,10 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 	for (i = 0; i < n_engines && n_execs; i++)
 		igt_assert(syncobj_wait(fd, &syncobjs[i], 1, INT64_MAX, 0,
 					NULL));
-	igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+	for (i = 0; i < n_vm; i++)
+		igt_assert(syncobj_wait(fd, &bind_syncobjs[i], 1, INT64_MAX, 0,
+					NULL));
 
 	sync[0].flags |= DRM_XE_SYNC_SIGNAL;
 	for (i = 0; i < n_vm; ++i) {
@@ -258,7 +265,6 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 	     i < n_execs; i++)
 		igt_assert_eq(data[i].data, 0xc0ffee);
 
-	syncobj_destroy(fd, sync[0].handle);
 	for (i = 0; i < n_engines; i++) {
 		syncobj_destroy(fd, syncobjs[i]);
 		xe_engine_destroy(fd, engines[i]);
@@ -272,8 +278,10 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
 	} else if (!(flags & INVALIDATE)) {
 		free(data);
 	}
-	for (i = 0; i < n_vm; ++i)
+	for (i = 0; i < n_vm; ++i) {
+		syncobj_destroy(fd, bind_syncobjs[i]);
 		xe_vm_destroy(fd, vm[i]);
+	}
 }
 
 igt_main
-- 
2.39.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-03-16  3:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-15 14:18 [igt-dev] [PATCH] text/xe/xe_exec_basic: Wait for the correct vm_bind before exec Thomas Hellström
2023-03-15 15:41 ` Matthew Brost
2023-03-15 15:47 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2023-03-16  3:42 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox