Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-08-30 14:55 vitaly.prosyak
  2023-08-30 15:06 ` Luben Tuikov
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: vitaly.prosyak @ 2023-08-30 14:55 UTC (permalink / raw)
  To: igt-dev; +Cc: Alex Deucher, Luben Tuikov, Christian Koenig

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

v2 : Kamil helped with podman script to fix the build
     failure when drmlib version < 2.4.97
v3 : Kamil suggested sort alphabetically includes.
     Luben suggested code and meson script improvements.

Cc: Luben Tuikov <luben.tuikov@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Kamil Konieczny <kamil.konieczny@linux.intel.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Kamil Konieczny <kamil.konieczny@linux.intel.com>
---
 tests/amdgpu/amd_syncobj.c | 262 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   5 +
 2 files changed, 267 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..e1d80758e
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ */
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include <pthread.h>
+
+#include "igt.h"
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_memory.h"
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+
+	return !(r || cap == 0);
+}
+
+static void
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+		uint32_t syncobj_handle, bool wait_or_signal, uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP : SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+					      sp->point);
+
+	return (void *)0;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, false,
+						sp->point);
+
+	return (void *)0;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 6032a38e8..ebf52bf38 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -37,6 +37,11 @@ if libdrm_amdgpu.found()
 			  'amd_mall',
 			  'amd_odm',
 			]
+	if libdrm_amdgpu.version().version_compare('> 2.4.97')
+		amdgpu_progs +=[ 'amd_syncobj', ]
+	else
+		warning('libdrm <= 2.4.97 found, amd_syncobj test not applicable')
+	endif
 	amdgpu_deps += libdrm_amdgpu
 endif
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-08-31  3:31 vitaly.prosyak
  2023-08-31 13:07 ` Luben Tuikov
  0 siblings, 1 reply; 21+ messages in thread
From: vitaly.prosyak @ 2023-08-31  3:31 UTC (permalink / raw)
  To: igt-dev; +Cc: Alex Deucher, Luben Tuikov, Christian Koenig

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

v2 : Kamil helped with podman script to fix the build
     failure when drmlib version < 2.4.97
v3 : Kamil suggested sort alphabetically includes.
     Luben suggested code and meson script improvements.

Cc: Luben Tuikov <luben.tuikov@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Kamil Konieczny <kamil.konieczny@linux.intel.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Kamil Konieczny <kamil.konieczny@linux.intel.com>
---
 tests/amdgpu/amd_syncobj.c | 262 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   5 +
 2 files changed, 267 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..e1d80758e
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ */
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include <pthread.h>
+
+#include "igt.h"
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_memory.h"
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+
+	return !(r || cap == 0);
+}
+
+static void
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+		uint32_t syncobj_handle, bool wait_or_signal, uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP : SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+					      sp->point);
+
+	return (void *)0;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, false,
+						sp->point);
+
+	return (void *)0;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 6032a38e8..ebf52bf38 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -37,6 +37,11 @@ if libdrm_amdgpu.found()
 			  'amd_mall',
 			  'amd_odm',
 			]
+	if libdrm_amdgpu.version().version_compare('> 2.4.97')
+		amdgpu_progs +=[ 'amd_syncobj', ]
+	else
+		warning('libdrm <= 2.4.97 found, amd_syncobj test not applicable')
+	endif
 	amdgpu_deps += libdrm_amdgpu
 endif
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-08-31  0:24 vitaly.prosyak
  0 siblings, 0 replies; 21+ messages in thread
From: vitaly.prosyak @ 2023-08-31  0:24 UTC (permalink / raw)
  To: igt-dev; +Cc: Alex Deucher, Luben Tuikov, Christian Koenig

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

v2 : Kamil helped with podman script to fix the build
     failure when drmlib version < 2.4.97
v3 : Kamil suggested sort alphabetically includes.
     Luben suggested code and meson script improvements.

Cc: Luben Tuikov <luben.tuikov@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Kamil Konieczny <kamil.konieczny@linux.intel.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Kamil Konieczny <kamil.konieczny@linux.intel.com>
---
 tests/amdgpu/amd_syncobj.c | 262 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   5 +
 2 files changed, 267 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..e1d80758e
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ */
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include <pthread.h>
+
+#include "igt.h"
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_memory.h"
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+
+	return !(r || cap == 0);
+}
+
+static void
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+		uint32_t syncobj_handle, bool wait_or_signal, uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP : SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+					      sp->point);
+
+	return (void *)0;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, false,
+						sp->point);
+
+	return (void *)0;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 6032a38e8..ebf52bf38 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -37,6 +37,11 @@ if libdrm_amdgpu.found()
 			  'amd_mall',
 			  'amd_odm',
 			]
+	if libdrm_amdgpu.version().version_compare('> 2.4.97')
+		amdgpu_progs +=[ 'amd_syncobj', ]
+	else
+		warning('libdrm <= 2.4.97 found, amd_syncobj test not applicable')
+	endif
 	amdgpu_deps += libdrm_amdgpu
 endif
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-08-29  4:40 vitaly.prosyak
  2023-08-29 12:24 ` Kamil Konieczny
  2023-08-29 17:18 ` Luben Tuikov
  0 siblings, 2 replies; 21+ messages in thread
From: vitaly.prosyak @ 2023-08-29  4:40 UTC (permalink / raw)
  To: igt-dev; +Cc: Alex Deucher, Luben Tuikov, Christian Koenig

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

v2 : Kamil helped with podman script to fix the build
     failure when drmlib version < 2.4.97

Cc: Luben Tuikov <luben.tuikov@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Kamil Konieczny <kamil.konieczny@linux.intel.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 tests/amdgpu/amd_syncobj.c | 262 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   5 +
 2 files changed, 267 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..e83b5c5ad
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+
+#include <pthread.h>
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+
+#include "igt.h"
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_memory.h"
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	bool ret = false;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+	if (r || cap == 0)
+		return ret;
+	ret = true;
+
+	return ret;
+}
+
+static void
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+		uint32_t syncobj_handle, bool wait_or_signal, uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP : SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+					      sp->point);
+
+	return (void *)0;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, false,
+						sp->point);
+
+	return (void *)0;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 6032a38e8..1520c34bd 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -37,6 +37,11 @@ if libdrm_amdgpu.found()
 			  'amd_mall',
 			  'amd_odm',
 			]
+	if libdrm_amdgpu.version().version_compare('> 2.4.97')
+		amdgpu_progs +=[ 'amd_syncobj', ]
+	else
+		warning('libdrm <= 2.4.97 found, no amd_syncobj test')
+	endif
 	amdgpu_deps += libdrm_amdgpu
 endif
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-08-22 21:17 vitaly.prosyak
  2023-08-23 16:34 ` Kamil Konieczny
  0 siblings, 1 reply; 21+ messages in thread
From: vitaly.prosyak @ 2023-08-22 21:17 UTC (permalink / raw)
  To: igt-dev

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 tests/amdgpu/amd_syncobj.c | 262 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   1 +
 2 files changed, 263 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..e83b5c5ad
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+
+#include <pthread.h>
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+
+#include "igt.h"
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_memory.h"
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	bool ret = false;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+	if (r || cap == 0)
+		return ret;
+	ret = true;
+
+	return ret;
+}
+
+static void
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+		uint32_t syncobj_handle, bool wait_or_signal, uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP : SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+					      sp->point);
+
+	return (void *)0;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, false,
+						sp->point);
+
+	return (void *)0;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 6032a38e8..1c4f5030b 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -6,6 +6,7 @@ if libdrm_amdgpu.found()
 			  'amd_assr',
 			  'amd_basic',
 			  'amd_bo',
+			  'amd_syncobj',
 			  'amd_bypass',
 			  'amd_color',
 			  'amd_cp_dma_misc',
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-07-02  1:22 vitaly.prosyak
  2023-07-03  7:39 ` Kamil Konieczny
  0 siblings, 1 reply; 21+ messages in thread
From: vitaly.prosyak @ 2023-07-02  1:22 UTC (permalink / raw)
  To: igt-dev; +Cc: Christian Koenig

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

v1->v2. Fixed style issues - Christian.
        Fixed formatting issues - Kamil.

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Acked-by: Christian Koenig <christian.koenig@amd.com>
Acked-by: Kamil Konieczny <kamil.konieczny@linux.intel.com>
---
 tests/amdgpu/amd_syncobj.c | 266 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   1 +
 2 files changed, 267 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..7cab093c9
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ */
+
+#include <pthread.h>
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+
+#include "igt.h"
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_memory.h"
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	bool ret = false;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+	if (r || cap == 0)
+		return ret;
+	ret = true;
+
+	return ret;
+}
+
+static void
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+		uint32_t syncobj_handle, bool wait_or_signal, uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP : SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+			sp->point);
+
+	return (void *)0;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+
+	syncobj_command_submission_helper(sp->device, sp->syncobj_handle, false,
+			sp->point);
+
+	return (void *)0;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 24843de73..02096934d 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -6,6 +6,7 @@ if libdrm_amdgpu.found()
 			  'amd_assr',
 			  'amd_basic',
 			  'amd_bo',
+			  'amd_syncobj',
 			  'amd_bypass',
 			  'amd_color',
 			  'amd_cp_dma_misc',
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [igt-dev] [PATCH] tests/amdgpu: add sync object tests
@ 2023-06-13 22:06 vitaly.prosyak
  2023-06-14  9:57 ` Christian König
  2023-06-14 11:09 ` Kamil Konieczny
  0 siblings, 2 replies; 21+ messages in thread
From: vitaly.prosyak @ 2023-06-13 22:06 UTC (permalink / raw)
  To: igt-dev; +Cc: alexander.deucher, michael.strawbridge, christian.koenig

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Using worker thread to wait on point and then signal point on other thread.
Another test uses a worker thread to signal point and wait on the main
thread using amdgpu_cs_syncobj_timeline_wait.

The command consists of two chunks :
1. AMDGPU_CHUNK_ID_IB uses GFX_COMPUTE_NOP  or SDMA_NOP.
2. The second chunk is AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT
   or AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL which has the
   point number .

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 tests/amdgpu/amd_syncobj.c | 293 +++++++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build   |   1 +
 2 files changed, 294 insertions(+)
 create mode 100644 tests/amdgpu/amd_syncobj.c

diff --git a/tests/amdgpu/amd_syncobj.c b/tests/amdgpu/amd_syncobj.c
new file mode 100644
index 000000000..d178c2600
--- /dev/null
+++ b/tests/amdgpu/amd_syncobj.c
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+
+#include "igt.h"
+#include "drmtest.h"
+#include "lib/amdgpu/amd_PM4.h" // GFX_COMPUTE_NOP
+#include "lib/amdgpu/amd_sdma.h"	// SDMA_NOP
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include "lib/amdgpu/amd_memory.h"
+#include <pthread.h>
+
+struct syncobj_point {
+	amdgpu_device_handle device;
+	uint32_t syncobj_handle;
+	uint64_t point;
+};
+
+
+static bool
+syncobj_timeline_enable(int fd)
+{
+	int r;
+	bool ret = false;
+	uint64_t cap = 0;
+
+	r = drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &cap);
+	if (r || cap == 0)
+		return ret;
+	ret = true;
+
+	return ret;
+}
+
+static int
+syncobj_command_submission_helper(amdgpu_device_handle device_handle,
+								  uint32_t syncobj_handle, bool wait_or_signal,
+								  uint64_t point)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct drm_amdgpu_cs_chunk chunks[2];
+	struct drm_amdgpu_cs_chunk_data chunk_data;
+	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
+	struct amdgpu_cs_fence fence_status;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	uint32_t expired;
+	int i, r;
+	uint64_t seq_no;
+	uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP: SDMA_NOP;
+
+	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
+	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
+	chunk_data.ib_data._pad = 0;
+	chunk_data.ib_data.va_start = ib_result_mc_address;
+	chunk_data.ib_data.ib_bytes = 16 * 4;
+	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	chunk_data.ib_data.ip_instance = 0;
+	chunk_data.ib_data.ring = 0;
+	chunk_data.ib_data.flags = 0;
+
+	chunks[1].chunk_id = wait_or_signal ?
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
+		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
+	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
+	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
+	syncobj_data.handle = syncobj_handle;
+	syncobj_data.point = point;
+	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+
+	r = amdgpu_cs_submit_raw(device_handle,
+				 context_handle,
+				 bo_list,
+				 2,
+				 chunks,
+				 &seq_no);
+	igt_assert_eq(r, 0);
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX : AMDGPU_HW_IP_DMA;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	igt_assert_eq(r, 0);
+
+	return r;
+}
+
+static void *
+syncobj_wait(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+	int r;
+
+	r = syncobj_command_submission_helper(sp->device, sp->syncobj_handle, true,
+					      sp->point);
+	igt_assert_eq(r, 0);
+
+	return (void *)(long)r;
+}
+
+static void *
+syncobj_signal(void *data)
+{
+	struct syncobj_point *sp = (struct syncobj_point *)data;
+	int r;
+
+	r = syncobj_command_submission_helper(sp->device,sp->syncobj_handle, false,
+					      sp->point);
+	igt_assert_eq(r, 0);
+
+	return (void *)(long)r;
+}
+
+static void
+amdgpu_syncobj_timeline(amdgpu_device_handle device_handle)
+{
+	static pthread_t wait_thread;
+	static pthread_t signal_thread;
+	static pthread_t c_thread;
+	struct syncobj_point sp1, sp2, sp3;
+	uint32_t syncobj_handle;
+	uint64_t payload;
+	uint64_t wait_point, signal_point;
+	uint64_t timeout;
+	struct timespec tp;
+	int r, sync_fd;
+	void *tmp, *tmp2;
+
+	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	// wait on point 5
+	sp1.syncobj_handle = syncobj_handle;
+	sp1.device = device_handle;
+	sp1.point = 5;
+	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
+	igt_assert_eq(r, 0);
+
+	// signal on point 10
+	sp2.syncobj_handle = syncobj_handle;
+	sp2.device = device_handle;
+	sp2.point = 10;
+	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, &tmp2);
+	igt_assert_eq(r, 0);
+
+	//query timeline payload
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	//signal on point 16
+	sp3.syncobj_handle = syncobj_handle;
+	sp3.device = device_handle;
+	sp3.point = 16;
+	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
+	igt_assert_eq(r, 0);
+
+	//CPU wait on point 16
+	wait_point = 16;
+	timeout = 0;
+	clock_gettime(CLOCK_MONOTONIC, &tp);
+	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+	timeout += 0x10000000000; //10s
+	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
+					    &wait_point, 1, timeout,
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+					    NULL);
+
+	igt_assert_eq(r, 0);
+	r = pthread_join(c_thread, &tmp);
+	igt_assert_eq(r, 0);
+
+	// export point 16 and import to point 18
+	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
+						16,
+						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						&sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
+						18, sync_fd);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 18);
+
+	// CPU signal on point 20
+	signal_point = 20;
+	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 20);
+
+	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	int fd = -1;
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		igt_require(syncobj_timeline_enable(fd));
+		igt_info("Initialized amdgpu, driver version %d.%d\n",
+			 major, minor);
+
+	}
+
+	igt_subtest("amdgpu_syncobj_timeline")
+	amdgpu_syncobj_timeline(device);
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		close(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 7fff7602f..7342bb714 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -5,6 +5,7 @@ if libdrm_amdgpu.found()
 	amdgpu_progs += [ 'amd_abm',
 			  'amd_assr',
 			  'amd_basic',
+			  'amd_syncobj',
 			  'amd_bypass',
 			  'amd_deadlock',
 			  'amd_pci_unplug',
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-08-31 13:07 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-30 14:55 [igt-dev] [PATCH] tests/amdgpu: add sync object tests vitaly.prosyak
2023-08-30 15:06 ` Luben Tuikov
2023-08-30 16:15 ` [igt-dev] ✗ GitLab.Pipeline: warning for tests/amdgpu: add sync object tests (rev5) Patchwork
2023-08-30 16:47 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2023-08-30 17:02 ` [igt-dev] ✓ CI.xeBAT: " Patchwork
2023-08-31  1:05 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2023-08-31  3:31 [igt-dev] [PATCH] tests/amdgpu: add sync object tests vitaly.prosyak
2023-08-31 13:07 ` Luben Tuikov
2023-08-31  0:24 vitaly.prosyak
2023-08-29  4:40 vitaly.prosyak
2023-08-29 12:24 ` Kamil Konieczny
2023-08-29 17:18 ` Luben Tuikov
2023-08-22 21:17 vitaly.prosyak
2023-08-23 16:34 ` Kamil Konieczny
2023-08-24  0:42   ` vitaly prosyak
2023-07-02  1:22 vitaly.prosyak
2023-07-03  7:39 ` Kamil Konieczny
2023-07-03 11:15   ` Prosyak, Vitaly
2023-06-13 22:06 vitaly.prosyak
2023-06-14  9:57 ` Christian König
2023-06-14 11:09 ` Kamil Konieczny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox