From: fei.yang@intel.com
To: igt-dev@lists.freedesktop.org
Cc: Fei Yang <fei.yang@intel.com>
Subject: [i-g-t 4/4] tests/intel/xe_exec_threads: separate sync data and batch buffer
Date: Wed, 30 Oct 2024 16:03:50 -0700 [thread overview]
Message-ID: <20241030230350.1681757-5-fei.yang@intel.com> (raw)
In-Reply-To: <20241030230350.1681757-1-fei.yang@intel.com>
From: Fei Yang <fei.yang@intel.com>
In INVALIDATE cases the test purposely remap the data buffer to a
different physical location in the midle of execution to exercise the
page fault handling flow. After the remapping we lose access to the old
physical location, and that would cause a problem for verifying stored
data and comparing ufence value at the end of the execution. To fix this
the data used for synchronization purpose needs to be separated from the
batch buffer for instructions, and during the execution we remap the
batch buffer only.
Signed-off-by: Fei Yang <fei.yang@intel.com>
---
tests/intel/xe_exec_threads.c | 70 ++++++++++++++++++++---------------
1 file changed, 41 insertions(+), 29 deletions(-)
diff --git a/tests/intel/xe_exec_threads.c b/tests/intel/xe_exec_threads.c
index 03043c53e..c14ffb1c9 100644
--- a/tests/intel/xe_exec_threads.c
+++ b/tests/intel/xe_exec_threads.c
@@ -241,6 +241,7 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
struct drm_xe_engine_class_instance *eci,
int n_exec_queues, int n_execs, unsigned int flags)
{
+ uint64_t syncaddr = addr + 0x10000000;
#define USER_FENCE_VALUE 0xdeadbeefdeadbeefull
struct drm_xe_sync sync[1] = {
{ .type = DRM_XE_SYNC_TYPE_USER_FENCE, .flags = DRM_XE_SYNC_FLAG_SIGNAL,
@@ -253,15 +254,17 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
};
int64_t fence_timeout;
uint32_t exec_queues[MAX_N_EXEC_QUEUES];
- size_t bo_size;
+ size_t bo_size, sync_size;
uint32_t bo = 0;
struct {
uint32_t batch[16];
uint64_t pad;
+ } *data;
+ struct {
uint64_t vm_sync;
uint64_t exec_sync;
uint32_t data;
- } *data;
+ } *syncdata;
int i, j, b;
int map_fd = -1;
bool owns_vm = false, owns_fd = false;
@@ -280,6 +283,8 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
bo_size = sizeof(*data) * n_execs;
bo_size = xe_bb_size(fd, bo_size);
+ sync_size = sizeof(*syncdata) * n_execs;
+ sync_size = xe_bb_size(fd, sync_size);
if (flags & USERPTR) {
if (flags & INVALIDATE) {
@@ -301,28 +306,37 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
}
memset(data, 0, bo_size);
+ syncdata = mmap(from_user_pointer(userptr + 0x10000000),
+ sync_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
+ igt_assert(syncdata != MAP_FAILED);
+ memset(syncdata, 0, sync_size);
+
for (i = 0; i < n_exec_queues; i++)
exec_queues[i] = xe_exec_queue_create(fd, vm, eci, 0);
pthread_barrier_wait(&barrier);
- sync[0].addr = to_user_pointer(&data[0].vm_sync);
+ sync[0].addr = to_user_pointer(&syncdata[0].vm_sync);
+ xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(syncdata),
+ syncaddr, sync_size, sync, 1);
+ fence_timeout = (igt_run_in_simulation() ? 30 : 3) * NSEC_PER_SEC;
+ xe_wait_ufence(fd, &syncdata[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout);
+ syncdata[0].vm_sync = 0;
+
if (bo)
xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
else
xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(data), addr,
bo_size, sync, 1);
-
- fence_timeout = (igt_run_in_simulation() ? 30 : 3) * NSEC_PER_SEC;
-
- xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout);
- data[0].vm_sync = 0;
+ xe_wait_ufence(fd, &syncdata[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout);
+ syncdata[0].vm_sync = 0;
for (i = 0; i < n_execs; i++) {
uint64_t batch_offset = (char *)&data[i].batch - (char *)data;
uint64_t batch_addr = addr + batch_offset;
- uint64_t sdi_offset = (char *)&data[i].data - (char *)data;
- uint64_t sdi_addr = addr + sdi_offset;
+ uint64_t sdi_offset = (char *)&syncdata[i].data - (char *)syncdata;
+ uint64_t sdi_addr = syncaddr + sdi_offset;
int e = i % n_exec_queues;
b = 0;
@@ -333,7 +347,7 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
data[i].batch[b++] = MI_BATCH_BUFFER_END;
igt_assert(b <= ARRAY_SIZE(data[i].batch));
- sync[0].addr = addr + (char *)&data[i].exec_sync - (char *)data;
+ sync[0].addr = syncaddr + (char *)&syncdata[i].exec_sync - (char *)syncdata;
exec.exec_queue_id = exec_queues[e];
exec.address = batch_addr;
@@ -341,13 +355,13 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
if (flags & REBIND && i && !(i & 0x1f)) {
for (j = i == 0x20 ? 0 : i - 0x1f; j <= i; ++j)
- xe_wait_ufence(fd, &data[j].exec_sync,
+ xe_wait_ufence(fd, &syncdata[j].exec_sync,
USER_FENCE_VALUE,
exec_queues[e], fence_timeout);
xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size,
NULL, 0);
- sync[0].addr = to_user_pointer(&data[0].vm_sync);
+ sync[0].addr = to_user_pointer(&syncdata[0].vm_sync);
addr += bo_size;
if (bo)
xe_vm_bind_async(fd, vm, 0, bo, 0, addr,
@@ -357,9 +371,9 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
to_user_pointer(data),
addr, bo_size, sync,
1);
- xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE,
+ xe_wait_ufence(fd, &syncdata[0].vm_sync, USER_FENCE_VALUE,
0, fence_timeout);
- data[0].vm_sync = 0;
+ syncdata[0].vm_sync = 0;
}
if (flags & INVALIDATE && i && !(i & 0x1f)) {
@@ -371,11 +385,11 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
* an invalidate.
*/
for (j = i == 0x20 ? 0 : i - 0x1f; j <= i; ++j)
- xe_wait_ufence(fd, &data[j].exec_sync,
+ xe_wait_ufence(fd, &syncdata[j].exec_sync,
USER_FENCE_VALUE,
exec_queues[e],
fence_timeout);
- igt_assert_eq(data[i].data, 0xc0ffee);
+ igt_assert_eq(syncdata[i].data, 0xc0ffee);
} else if (i * 2 != n_execs) {
/*
* We issue 1 mmap which races against running
@@ -409,15 +423,9 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
/*
* For !RACE cases xe_wait_ufence has been called in above for-loop
* except the last batch of submissions. For RACE cases we will need
- * to wait for the second half of the submissions to complete. There
- * is a potential race here because the first half submissions might
- * have updated the fence in the old physical location while the test
- * is remapping the buffer from a different physical location, but the
- * wait_ufence only checks the fence from the new location which would
- * never be updated. We have to assume the first half of the submissions
- * complete before the second half.
+ * to wait for all submissions to complete.
*/
- j = (flags & RACE) ? (n_execs / 2 + 1) : (((n_execs - 1) & ~0x1f) + 1);
+ j = (flags & RACE) ? 0 : (((n_execs - 1) & ~0x1f) + 1);
else if (flags & REBIND)
/*
* For REBIND cases xe_wait_ufence has been called in above for-loop
@@ -426,19 +434,23 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr,
j = ((n_execs - 1) & ~0x1f) + 1;
for (i = j; i < n_execs; i++)
- xe_wait_ufence(fd, &data[i].exec_sync, USER_FENCE_VALUE,
+ xe_wait_ufence(fd, &syncdata[i].exec_sync, USER_FENCE_VALUE,
exec_queues[i % n_exec_queues], fence_timeout);
- sync[0].addr = to_user_pointer(&data[0].vm_sync);
+ sync[0].addr = to_user_pointer(&syncdata[0].vm_sync);
xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, sync, 1);
- xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout);
+ xe_wait_ufence(fd, &syncdata[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout);
+ syncdata[0].vm_sync = 0;
+ xe_vm_unbind_async(fd, vm, 0, 0, syncaddr, sync_size, sync, 1);
+ xe_wait_ufence(fd, &syncdata[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout);
for (i = j; i < n_execs; i++)
- igt_assert_eq(data[i].data, 0xc0ffee);
+ igt_assert_eq(syncdata[i].data, 0xc0ffee);
for (i = 0; i < n_exec_queues; i++)
xe_exec_queue_destroy(fd, exec_queues[i]);
+ munmap(syncdata, sync_size);
if (bo) {
munmap(data, bo_size);
gem_close(fd, bo);
--
2.25.1
next prev parent reply other threads:[~2024-10-30 23:00 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-30 23:03 [i-g-t 0/4] separate sync data and batch buffer fei.yang
2024-10-30 23:03 ` [i-g-t 1/4] tests/intel/xe_exec_fault_mode: " fei.yang
2024-11-05 23:55 ` Matt Roper
2024-10-30 23:03 ` [i-g-t 2/4] tests/intel/xe_exec_threads: remove redundant wait fei.yang
2024-11-05 23:59 ` Matt Roper
2024-10-30 23:03 ` [i-g-t 3/4] tests/intel/xe_exec_threads: wait for all submissions to complete fei.yang
2024-10-30 23:03 ` fei.yang [this message]
2024-10-31 0:57 ` ✓ CI.xeBAT: success for separate sync data and batch buffer Patchwork
2024-10-31 0:59 ` ✓ Fi.CI.BAT: " Patchwork
2024-10-31 2:05 ` ✗ CI.xeFULL: failure " Patchwork
2024-10-31 11:10 ` ✗ Fi.CI.IGT: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241030230350.1681757-5-fei.yang@intel.com \
--to=fei.yang@intel.com \
--cc=igt-dev@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox