[PATCH i-g-t] test/amdgpu: add user queue test

Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH i-g-t] test/amdgpu: add user queue test
@ 2025-03-27  7:17 Jesse.zhang@amd.com
  2025-03-27  7:50 ` ✓ Xe.CI.BAT: success for " Patchwork
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: Jesse.zhang@amd.com @ 2025-03-27  7:17 UTC (permalink / raw)
  To: igt-dev
  Cc: Vitaly Prosyak, Alex Deucher, Christian Koenig,
	Srinivasan Shanmugam, Jesse . zhang

From: "Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>"

This patch introduces a new test for AMDGPU user queues, which provides
functionality for userspace to manage GPU queues directly. The test covers:

1. Basic user queue operations for GFX, COMPUTE and SDMA IP blocks
2. Synchronization between user queues using syncobjs
3. Timeline-based synchronization
4. Multi-threaded signaling and waiting scenarios

Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Jesse.zhang <Jesse.zhang@amd.com>
---
 include/drm-uapi/amdgpu_drm.h  |  254 +++++
 tests/amdgpu/amd_userq_basic.c | 1706 ++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build       |    8 +-
 3 files changed, 1967 insertions(+), 1 deletion(-)
 create mode 100644 tests/amdgpu/amd_userq_basic.c

diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h
index efe5de6ce..d83216a59 100644
--- a/include/drm-uapi/amdgpu_drm.h
+++ b/include/drm-uapi/amdgpu_drm.h
@@ -54,6 +54,9 @@ extern "C" {
 #define DRM_AMDGPU_VM			0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
 #define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +74,9 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
 
 /**
  * DOC: memory domains
@@ -319,6 +325,241 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated to this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	__u32   _pad;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32	_pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
@@ -592,6 +833,19 @@ struct drm_amdgpu_gem_va {
 	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
 	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
 };
 
 #define AMDGPU_HW_IP_GFX          0
diff --git a/tests/amdgpu/amd_userq_basic.c b/tests/amdgpu/amd_userq_basic.c
new file mode 100644
index 000000000..b010fed7a
--- /dev/null
+++ b/tests/amdgpu/amd_userq_basic.c
@@ -0,0 +1,1706 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ */
+ #include <pthread.h>
+ #include <time.h>
+ #include "lib/amdgpu/amd_memory.h"
+ #include "lib/amdgpu/amd_sdma.h"
+ #include "lib/amdgpu/amd_PM4.h"
+ #include "lib/amdgpu/amd_command_submission.h"
+ #include "lib/amdgpu/amd_compute.h"
+ #include "lib/amdgpu/amd_gfx.h"
+ #include "lib/amdgpu/amd_shaders.h"
+ #include "lib/amdgpu/amd_dispatch.h"
+ #include "include/drm-uapi/amdgpu_drm.h"
+ #include "lib/amdgpu/amd_cs_radv.h"
+
+ #define BUFFER_SIZE (8 * 1024)
+
+/* Flag to indicate secure buffer related workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
+
+ #define PACKET_TYPE3			3
+ #define PACKET3(op, n)			((PACKET_TYPE3 << 30) |  \
+					(((op) & 0xFF) << 8)  |  \
+					((n) & 0x3FFF) << 16)
+
+ #define PACKET3_NOP			0x10
+ #define PACKET3_PROTECTED_FENCE_SIGNAL	0xd0
+ #define PACKET3_FENCE_WAIT_MULTI	0xd1
+ #define PACKET3_WRITE_DATA		0x37
+
+ #define PACKET3_WAIT_REG_MEM		0x3C
+ #define WAIT_REG_MEM_FUNCTION(x)	((x) << 0)
+ #define WAIT_REG_MEM_MEM_SPACE(x)	((x) << 4)
+ #define WAIT_REG_MEM_OPERATION(x)	((x) << 6)
+ #define WAIT_REG_MEM_ENGINE(x)		((x) << 8)
+
+ #define WR_CONFIRM			(1 << 20)
+ #define WRITE_DATA_DST_SEL(x)		((x) << 8)
+ #define WRITE_DATA_ENGINE_SEL(x)	((x) << 30)
+ #define WRITE_DATA_CACHE_POLICY(x)	((x) << 25)
+ #define WAIT_MEM_ENGINE_SEL(x)		((x) << 0)
+ #define WAIT_MEM_WAIT_PREEMPTABLE(x)	((x) << 1)
+ #define WAIT_MEM_CACHE_POLICY(x)	((x) << 2)
+ #define WAIT_MEM_POLL_INTERVAL(x)	((x) << 16)
+
+ #define DOORBELL_INDEX			4
+ #define AMDGPU_USERQ_BO_WRITE		1
+
+ #define	PACKET3_RELEASE_MEM				0x49
+ #define		PACKET3_RELEASE_MEM_CACHE_POLICY(x)	((x) << 25)
+ #define		PACKET3_RELEASE_MEM_DATA_SEL(x)		((x) << 29)
+ #define		PACKET3_RELEASE_MEM_INT_SEL(x)		((x) << 24)
+ #define		CACHE_FLUSH_AND_INV_TS_EVENT		0x00000014
+
+ #define		PACKET3_RELEASE_MEM_EVENT_TYPE(x)	((x) << 0)
+ #define		PACKET3_RELEASE_MEM_EVENT_INDEX(x)	((x) << 8)
+ #define		PACKET3_RELEASE_MEM_GCR_GLM_WB		(1 << 12)
+ #define		PACKET3_RELEASE_MEM_GCR_GLM_INV		(1 << 13)
+ #define		PACKET3_RELEASE_MEM_GCR_GLV_INV		(1 << 14)
+ #define		PACKET3_RELEASE_MEM_GCR_GL1_INV		(1 << 15)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_US		(1 << 16)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_RANGE	(1 << 17)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_DISCARD	(1 << 19)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_INV		(1 << 20)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_WB		(1 << 21)
+ #define		PACKET3_RELEASE_MEM_GCR_SEQ		(1 << 22)
+
+//SDMA related
+ #define SDMA_OPCODE_COPY		1
+ #define SDMA_OPCODE_WRITE		2
+ #define SDMA_COPY_SUB_OPCODE_LINEAR	0
+ #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |       \
+					(((sub_op) & 0xFF) << 8) |      \
+					(((op) & 0xFF) << 0))
+ #define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
+ #define lower_32_bits(n) ((uint32_t)((n) & 0xfffffffc))
+
+/* user queue IOCTL */
+ #define AMDGPU_USERQ_OP_CREATE  1
+ #define AMDGPU_USERQ_OP_FREE    2
+
+/* Flag to indicate secure buffer related workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
+
+//#define WORKLOAD_COUNT				7
+ #define WORKLOAD_COUNT				1
+ #define DEBUG_USERQUEUE				1
+
+ #define PAGE_SIZE			4096
+ #define USERMODE_QUEUE_SIZE		(PAGE_SIZE * 256)
+ #define ALIGNMENT			4096
+
+struct amdgpu_userq_bo {
+	amdgpu_bo_handle handle;
+	amdgpu_va_handle va_handle;
+	uint64_t mc_addr;
+	uint64_t size;
+	void *ptr;
+};
+
+static struct amdgpu_userq_bo shared_userq_bo;
+static int shared_syncobj_fd1;
+static int shared_syncobj_fd2;
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+ #if DEBUG_USERQUEUE
+static void packet_dump(uint32_t *ptr, int start, int end)
+{
+	int i;
+
+	igt_info("\n============PACKET==============\n");
+	for (i = start; i < end; i++)
+		igt_info("pkt[%d] = 0x%x\n", i - start, ptr[i]);
+
+	igt_info("=================================\n");
+}
+ #endif
+
+static void validation(uint32_t *workload)
+{
+	int i = 0;
+
+	while (workload[0] != 0xdeadbeaf) {
+		if (i++ > 100)
+			break;
+		usleep(100);
+	}
+
+	igt_info("\n========OUTPUT==========\n");
+	for (i = 0; i < 5; i++)
+		igt_info("worklod[%d] = %x\n", i, workload[i]);
+
+	igt_info("===========================\n");
+}
+
+static void create_relmem_workload(uint32_t *ptr, int *npkt, int data,
+			    uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
+			    uint32_t q_id, uint64_t addr)
+{
+	ptr[(*npkt)++] = (PACKET3(PACKET3_RELEASE_MEM, 6));
+	ptr[(*npkt)++] = 0x0030e514;
+	ptr[(*npkt)++] = 0x23010000;
+	ptr[(*npkt)++] = lower_32_bits(addr);
+	ptr[(*npkt)++] = upper_32_bits(addr);
+	ptr[(*npkt)++] = 0xffffffff & data;
+	ptr[(*npkt)++] = 0;
+	ptr[(*npkt)++] = q_id;
+	*wptr_cpu = *npkt;
+	doorbell_ptr[DOORBELL_INDEX] = *npkt;
+}
+
+static int create_submit_workload(uint32_t *ptr, int *npkt, uint32_t data,
+			   uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
+			   uint32_t q_id, struct amdgpu_userq_bo *dstptr)
+{
+ #if DEBUG_USERQUEUE
+	int start = *npkt;
+ #endif
+	ptr[(*npkt)++] = PACKET3(PACKET3_WRITE_DATA, 7);
+	ptr[(*npkt)++] =
+	    WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
+
+	ptr[(*npkt)++] = 0xfffffffc & (dstptr->mc_addr);
+	ptr[(*npkt)++] = (0xffffffff00000000 & (dstptr->mc_addr)) >> 32;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	create_relmem_workload(ptr, npkt, 0xdeadbeaf, wptr_cpu,
+			       doorbell_ptr, q_id, dstptr->mc_addr);
+ #if DEBUG_USERQUEUE
+	packet_dump(ptr, start, *npkt);
+ #endif
+	return 0;
+}
+
+static void alloc_doorbell(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *doorbell_bo,
+			   unsigned int size, unsigned int domain)
+{
+	struct amdgpu_bo_alloc_request req = {0};
+	amdgpu_bo_handle buf_handle;
+	int r;
+
+	req.alloc_size = ALIGN(size, PAGE_SIZE);
+	req.preferred_heap = domain;
+
+	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
+	igt_assert_eq(r, 0);
+
+	doorbell_bo->handle = buf_handle;
+	doorbell_bo->size = req.alloc_size;
+
+	r = amdgpu_bo_cpu_map(doorbell_bo->handle,
+			      (void **)&doorbell_bo->ptr);
+	igt_assert_eq(r, 0);
+}
+
+static int timeline_syncobj_wait(amdgpu_device_handle device_handle, uint32_t timeline_syncobj_handle)
+{
+	uint64_t point, signaled_point;
+	uint64_t timeout;
+	struct timespec tp;
+	uint32_t flags = DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED;
+	int r;
+
+	do {
+		r = amdgpu_cs_syncobj_query2(device_handle, &timeline_syncobj_handle,
+					     (uint64_t *)&point, 1, flags);
+		if (r)
+			return r;
+
+		timeout = 0;
+		clock_gettime(CLOCK_MONOTONIC, &tp);
+		timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+		timeout += 100000000; //100 millisec
+		r = amdgpu_cs_syncobj_timeline_wait(device_handle, &timeline_syncobj_handle,
+						    (uint64_t *)&point, 1, timeout,
+						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						    NULL);
+		if (r)
+			return r;
+
+		r = amdgpu_cs_syncobj_query(device_handle, &timeline_syncobj_handle, &signaled_point, 1);
+		if (r)
+			return r;
+	} while (point != signaled_point);
+
+	return r;
+}
+
+static int
+amdgpu_bo_unmap_and_free_uq(amdgpu_device_handle dev, amdgpu_bo_handle bo,
+			    amdgpu_va_handle va_handle, uint64_t mc_addr, uint64_t size,
+			    uint32_t timeline_syncobj_handle, uint16_t point)
+{
+	amdgpu_bo_cpu_unmap(bo);
+	amdgpu_bo_va_op_raw2(dev, bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP, timeline_syncobj_handle, point, 0, 0);
+
+	amdgpu_va_range_free(va_handle);
+	amdgpu_bo_free(bo);
+
+	return 0;
+}
+
+static int amdgpu_bo_alloc_and_map_uq(amdgpu_device_handle dev,
+					      uint64_t size,
+					      uint64_t alignment,
+					      uint64_t heap,
+					      uint64_t alloc_flags,
+					      uint64_t mapping_flags,
+					      amdgpu_bo_handle *bo,
+					      void **cpu,
+					      uint64_t *mc_address,
+					      amdgpu_va_handle *va_handle,
+					      uint32_t timeline_syncobj_handle,
+					      uint64_t point)
+{
+	struct amdgpu_bo_alloc_request request = {};
+	amdgpu_bo_handle buf_handle;
+	amdgpu_va_handle handle;
+	uint64_t vmc_addr;
+	int r;
+
+	request.alloc_size = size;
+	request.phys_alignment = alignment;
+	request.preferred_heap = heap;
+	request.flags = alloc_flags;
+
+	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
+	if (r)
+		return r;
+
+	r = amdgpu_va_range_alloc(dev,
+				  amdgpu_gpu_va_range_general,
+				  size, alignment, 0, &vmc_addr,
+				  &handle, 0);
+	if (r)
+		goto error_va_alloc;
+
+	r = amdgpu_bo_va_op_raw2(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
+				   AMDGPU_VM_PAGE_READABLE |
+				   AMDGPU_VM_PAGE_WRITEABLE |
+				   AMDGPU_VM_PAGE_EXECUTABLE |
+				   mapping_flags,
+				   AMDGPU_VA_OP_MAP,
+				   timeline_syncobj_handle,
+				   point, 0, 0);
+	if (r) {
+		goto error_va_map;
+	}
+
+	r = amdgpu_bo_cpu_map(buf_handle, cpu);
+	if (r)
+		goto error_cpu_map;
+
+	*bo = buf_handle;
+	*mc_address = vmc_addr;
+	*va_handle = handle;
+
+	return 0;
+
+ error_cpu_map:
+	amdgpu_bo_cpu_unmap(buf_handle);
+ error_va_map:
+	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
+ error_va_alloc:
+	amdgpu_bo_free(buf_handle);
+	return r;
+}
+
+static void free_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
+		   uint32_t timeline_syncobj_handle, uint64_t point,
+		   uint64_t syncobj_handles_array, uint32_t num_syncobj_handles)
+{
+	int r;
+
+	r = amdgpu_bo_unmap_and_free_uq(device_handle, dstptr->handle, dstptr->va_handle,
+				     dstptr->mc_addr, PAGE_SIZE,
+				     timeline_syncobj_handle, point);
+	igt_assert_eq(r, 0);
+}
+
+static int allocate_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
+		      uint32_t timeline_syncobj_handle, uint64_t point)
+{
+
+	uint64_t gtt_flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+	int r;
+
+	r = amdgpu_bo_alloc_and_map_uq(device_handle, PAGE_SIZE,
+				       PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM,
+				       gtt_flags,
+				       AMDGPU_VM_MTYPE_UC,
+				       &dstptr->handle, &dstptr->ptr,
+				       &dstptr->mc_addr, &dstptr->va_handle,
+				       timeline_syncobj_handle, point);
+	memset(&dstptr->ptr, 0x0, sizeof(*dstptr->ptr));
+	return r;
+}
+
+static int create_sync_objects(int fd, uint32_t *timeline_syncobj_handle,
+			       uint32_t *timeline_syncobj_handle2)
+{
+	int r;
+
+	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle);
+	if (r)
+		return r;
+
+	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle2);
+
+	return r;
+}
+
+static void *userq_signal(void *data)
+{
+	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
+	uint32_t q_id, syncobj_handle, syncobj_handle1, db_handle;
+	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	struct  amdgpu_userq_bo gds, csa;
+	uint32_t syncarray[2];
+	uint32_t *ptr;
+	int r, i;
+	uint32_t timeline_syncobj_handle;
+	uint64_t point = 0;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t point2 = 0;
+	struct drm_amdgpu_userq_signal signal_data;
+	uint32_t bo_read_handles[1], bo_write_handles[1];
+	uint32_t read_handle, write_handle;
+
+
+	amdgpu_device_handle device = (amdgpu_device_handle)data;
+
+	int fd = amdgpu_device_get_fd(device);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+				ALIGNMENT,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&queue.handle, &queue.ptr,
+				&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&wptr_bo.handle, &wptr_bo.ptr,
+				&wptr_bo.mc_addr, &wptr_bo.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&rptr.handle, &rptr.ptr,
+				&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&shadow.handle, &shadow.ptr,
+				&shadow.mc_addr, &shadow.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&gds.handle, &gds.ptr,
+				&gds.mc_addr, &gds.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&csa.handle, &csa.ptr,
+				&csa.mc_addr, &csa.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	//mqd.gds_va = gds.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *)doorbell.ptr;
+
+	ptr = (uint32_t *)queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	wptr = (uint64_t *)wptr_bo.ptr;
+	memset(wptr, 0, sizeof(*wptr));
+
+	//amdgpu_userqueue_get_bo_handle(doorbell.handle, &db_handle);
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	r = drmSyncobjCreate(fd, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &syncobj_handle1);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjHandleToFD(fd, syncobj_handle, &shared_syncobj_fd2);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjHandleToFD(fd, syncobj_handle1, &shared_syncobj_fd1);
+	igt_assert_eq(r, 0);
+
+	syncarray[0] = syncobj_handle;
+	syncarray[1] = syncobj_handle1;
+
+	ptr[0] = PACKET3(PACKET3_WRITE_DATA, 7);
+	ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
+	ptr[2] = 0xfffffffc & (shared_userq_bo.mc_addr);
+	ptr[3] = (0xffffffff00000000 & (shared_userq_bo.mc_addr)) >> 32;
+	ptr[4] = 0xdeadbeaf;
+	ptr[5] = 0xdeadbeaf;
+	ptr[6] = 0xdeadbeaf;
+	ptr[7] = 0xdeadbeaf;
+	ptr[8] = 0xdeadbeaf;
+
+	for (i = 9; i <= 60; i++)
+		ptr[i] = PACKET3(PACKET3_NOP, 0x3fff);
+
+	ptr[i++] = PACKET3(PACKET3_PROTECTED_FENCE_SIGNAL, 0);
+
+	*wptr = ++i;
+	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
+	igt_assert_eq(r, 0);
+	// Assign the exported handles to the arrays
+	bo_read_handles[0] = read_handle;
+	bo_write_handles[0] = write_handle;
+
+	signal_data.queue_id = q_id;
+	signal_data.syncobj_handles = (uint64_t)&syncarray;
+	signal_data.num_syncobj_handles = 2;
+	signal_data.bo_write_handles = (uint64_t)bo_write_handles;
+	signal_data.num_bo_write_handles = 1;
+	signal_data.bo_read_handles = (uint64_t)bo_read_handles;
+	signal_data.num_bo_read_handles = 1;
+
+	r = amdgpu_userq_signal(device, &signal_data);
+	igt_assert_eq(r, 0);
+
+	doorbell_ptr[DOORBELL_INDEX]  = i;
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+	if (!r)
+		pthread_cond_signal(&cond);
+
+err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					csa.va_handle,
+					csa.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					gds.va_handle,
+					gds.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					shadow.va_handle,
+					shadow.mc_addr, PAGE_SIZE * 4,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
+				     rptr.mc_addr, PAGE_SIZE);
+
+	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
+				     wptr_bo.mc_addr, PAGE_SIZE);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				     queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+
+	return (void *)(long)r;
+}
+
+static void *userq_wait(void *data)
+{
+	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
+	struct  amdgpu_userq_bo gds, csa;
+	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+	uint32_t syncobj_handle, syncobj_handle1, db_handle;
+	uint64_t num_fences;
+	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	uint64_t gpu_addr, reference_val;
+	uint32_t *ptr;
+	uint32_t q_id;
+	int i, r, fd;
+	uint32_t timeline_syncobj_handle;
+	uint64_t point = 0;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t point2 = 0;
+	struct drm_amdgpu_userq_wait wait_data;
+	uint32_t bo_read_handles[1], bo_write_handles[1];
+	uint32_t read_handle, write_handle;
+	uint32_t syncarray[3], points[3];
+	amdgpu_device_handle device;
+
+	pthread_mutex_lock(&lock);
+	pthread_cond_wait(&cond, &lock);
+	pthread_mutex_unlock(&lock);
+
+	device = (amdgpu_device_handle)data;
+	fd = amdgpu_device_get_fd(device);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+				ALIGNMENT,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&queue.handle, &queue.ptr,
+				&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&wptr_bo.handle, &wptr_bo.ptr,
+				&wptr_bo.mc_addr, &wptr_bo.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&rptr.handle, &rptr.ptr,
+				&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&shadow.handle, &shadow.ptr,
+				&shadow.mc_addr, &shadow.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&gds.handle, &gds.ptr,
+				&gds.mc_addr, &gds.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&csa.handle, &csa.ptr,
+				&csa.mc_addr, &csa.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *)doorbell.ptr;
+
+	ptr = (uint32_t *)queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	wptr = (uint64_t *)wptr_bo.ptr;
+	memset(wptr, 0, sizeof(*wptr));
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd1, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd2, &syncobj_handle1);
+	igt_assert_eq(r, 0);
+
+	syncarray[0] = syncobj_handle;
+	syncarray[1] = syncobj_handle1;
+
+	points[0] = 0;
+	points[1] = 0;
+	num_fences = 0;
+	 r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
+	igt_assert_eq(r, 0);
+
+	// Assign the exported handles to the arrays
+	bo_read_handles[0] = read_handle;
+	bo_write_handles[0] = write_handle;
+
+	wait_data.syncobj_handles = (uint64_t)syncarray;
+	wait_data.num_syncobj_handles = 2;
+	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
+	wait_data.syncobj_timeline_points = (uint64_t)points;
+	wait_data.num_syncobj_timeline_handles = 2;
+	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
+	wait_data.num_bo_read_handles = 1;
+	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
+	wait_data.num_bo_write_handles = 1;
+	wait_data.out_fences = (uint64_t)fence_info;
+	wait_data.num_fences = num_fences;
+
+	igt_assert_eq(r, 0);
+
+	num_fences = wait_data.num_fences;
+	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
+	if (!fence_info)
+		goto err_free_queue;
+	memset(fence_info, 0, num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
+	wait_data.out_fences = (uint64_t)fence_info;
+	r = amdgpu_userq_wait(device, &wait_data);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < num_fences; i++) {
+		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
+			num_fences, (fence_info + i)->va, (fence_info + i)->value);
+
+		gpu_addr = (fence_info + i)->va;
+		reference_val = (fence_info + i)->value;
+		ptr[0] = PACKET3(PACKET3_FENCE_WAIT_MULTI, 4);
+		ptr[1] = WAIT_MEM_ENGINE_SEL(1) | WAIT_MEM_WAIT_PREEMPTABLE(0) | WAIT_MEM_CACHE_POLICY(3) | WAIT_MEM_POLL_INTERVAL(2);
+		ptr[2] = 0xffffffff & (gpu_addr);
+		ptr[3] = (0xffffffff00000000 & (gpu_addr)) >> 16;
+		ptr[4] = 0xffffffff & (reference_val);
+		ptr[5] = (0xffffffff00000000 & (reference_val)) >> 32;
+		*wptr = 6;
+		doorbell_ptr[DOORBELL_INDEX]  = 6;
+	}
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					csa.va_handle,
+					csa.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					gds.va_handle,
+					gds.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					shadow.va_handle,
+					shadow.mc_addr, PAGE_SIZE * 4,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
+				     rptr.mc_addr, PAGE_SIZE);
+	//igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
+				     wptr_bo.mc_addr, PAGE_SIZE);
+	//igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				     queue.mc_addr, USERMODE_QUEUE_SIZE);
+	//igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, syncobj_handle1);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+	free(fence_info);
+	return (void *)(long)r;
+}
+
+static void amdgpu_command_submission_umq_synchronize_test(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r;
+	static pthread_t signal_thread, wait_thread;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+
+
+	int fd = amdgpu_device_get_fd(device);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE,
+				       ALIGNMENT,
+				       AMDGPU_GEM_DOMAIN_GTT,
+				       gtt_flags,
+				       AMDGPU_VM_MTYPE_UC,
+				       &shared_userq_bo.handle, &shared_userq_bo.ptr,
+				       &shared_userq_bo.mc_addr, &shared_userq_bo.va_handle,
+				       timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = pthread_create(&signal_thread, NULL, userq_signal, device);
+	igt_assert_eq(r, 0);
+
+	r = pthread_create(&wait_thread, NULL, userq_wait, device);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, NULL);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, NULL);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free_uq(device, shared_userq_bo.handle,
+				    shared_userq_bo.va_handle,
+				    shared_userq_bo.mc_addr,
+				    PAGE_SIZE, timeline_syncobj_handle2,
+				    ++point2);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+}
+
+static void amdgpu_command_submission_umq_timeline_test(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr, rptr;
+	struct  amdgpu_userq_bo gds, csa;
+	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+	uint64_t num_fences;
+	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr_cpu;
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	struct  amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
+	uint32_t q_id, db_handle, *ptr;
+	uint32_t timeline_syncobj_handle;
+	uint64_t point = 0;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t point2 = 0;
+	uint32_t syncarray[3], points[3];
+	uint32_t test_timeline_syncobj_handle;
+	uint32_t test_timeline_syncobj_handle2;
+	uint64_t signal_point, payload;
+	struct drm_amdgpu_userq_wait wait_data;
+	int i, r, npkt = 0;
+	uint32_t bo_read_handles[1], bo_write_handles[1];
+	uint32_t read_handle, write_handle;
+	int fd = amdgpu_device_get_fd(device);
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r =  amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&shadow.handle, &shadow.ptr,
+				&shadow.mc_addr, &shadow.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&gds.handle, &gds.ptr,
+				&gds.mc_addr, &gds.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&csa.handle, &csa.ptr,
+				&csa.mc_addr, &csa.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	ptr = (uint32_t *)queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	wptr_cpu = (uint64_t *)wptr.ptr;
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *)dstptrs[i].ptr);
+	signal_point = 5;
+	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 5);
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *)dstptrs[i].ptr);
+
+	signal_point = 10;
+	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	syncarray[0] = test_timeline_syncobj_handle;
+	syncarray[1] = test_timeline_syncobj_handle;
+
+	points[0] = 5;
+	points[1] = 10;
+
+	num_fences = 0;
+
+	// Export the buffer object handles
+	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
+	igt_assert_eq(r, 0);
+
+	// Assign the exported handles to the arrays
+	bo_read_handles[0] = read_handle;
+	bo_write_handles[0] = write_handle;
+
+	wait_data.syncobj_handles = (uint64_t)syncarray;
+	wait_data.num_syncobj_handles = 2;
+	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
+	wait_data.syncobj_timeline_points = (uint64_t)points;
+	wait_data.num_syncobj_timeline_handles = 2;
+	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
+	wait_data.num_bo_read_handles = 1;
+	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
+	wait_data.num_bo_write_handles = 1;
+	wait_data.out_fences = (uint64_t)fence_info;
+	wait_data.num_fences = num_fences;
+	r = amdgpu_userq_wait(device, &wait_data);
+	igt_assert_eq(r, 0);
+
+	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
+	r = amdgpu_userq_wait(device, &wait_data);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < num_fences; i++)
+		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
+			num_fences, (fence_info + i)->va, (fence_info + i)->value);
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+	/* Free workload*/
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
+			      0, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					csa.va_handle,
+					csa.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					gds.va_handle,
+					gds.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					shadow.va_handle,
+					shadow.mc_addr, PAGE_SIZE * 4,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
+				     rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle,
+				     wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				     queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+}
+
+/**
+ * AMDGPU_HW_IP_DMA
+ * @param device
+ */
+static void amdgpu_command_submission_umq_sdma(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r, i = 0, j = 0;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t *ptr, *dstptr;
+	uint32_t q_id, db_handle;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t *doorbell_ptr, *wptr_cpu;
+	const int sdma_write_length = WORKLOAD_COUNT;
+	struct drm_amdgpu_userq_mqd_sdma_gfx11 mqd;
+	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, dst;
+	int fd = amdgpu_device_get_fd(device);
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 10,
+				       ALIGNMENT,
+				       AMDGPU_GEM_DOMAIN_VRAM,
+				       gtt_flags | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+				       AMDGPU_VM_MTYPE_UC,
+				       &dst.handle, &dst.ptr,
+				       &dst.mc_addr, &dst.va_handle,
+				       timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE * 2, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	wptr_cpu = (uint64_t *) wptr.ptr;
+
+	ptr = (uint32_t *) queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	dstptr = (uint32_t *)dst.ptr;
+	memset(dstptr, 0, sizeof(*dstptr) * sdma_write_length);
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_DMA,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	ptr[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 0, 0);
+	ptr[i++] = lower_32_bits(dst.mc_addr);
+	ptr[i++] = upper_32_bits(dst.mc_addr);
+	ptr[i++] = sdma_write_length - 1;
+	while (j++ < sdma_write_length)
+		ptr[i++] = 0xdeadbeaf;
+
+	*wptr_cpu = i << 2;
+
+	doorbell_ptr[DOORBELL_INDEX] = i << 2;
+
+	i = 0;
+	while (dstptr[0] != 0xdeadbeaf) {
+		if (i++ > 100)
+			break;
+		usleep(100);
+	}
+
+	for (int k = 0; k < sdma_write_length; k++) {
+		igt_assert_eq(dstptr[k], 0xdeadbeaf);
+	}
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+
+ err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, dst.handle,
+					dst.va_handle, dst.mc_addr,
+					PAGE_SIZE * 10,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				 queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+}
+
+/**
+ * AMDGPU_HW_IP_COMPUTE
+ * @param device
+ */
+static void amdgpu_command_submission_umq_compute(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r, i = 0, npkt = 0;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t *ptr;
+	uint32_t q_id, db_handle;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t *doorbell_ptr, *wptr_cpu;
+	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
+	struct drm_amdgpu_userq_mqd_compute_gfx11 mqd;
+	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, eop;
+	int fd = amdgpu_device_get_fd(device);
+
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, 256,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &eop.handle, &eop.ptr,
+					       &eop.mc_addr, &eop.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.eop_va = eop.mc_addr;
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	wptr_cpu = (uint64_t *) wptr.ptr;
+
+	ptr = (uint32_t *) queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_COMPUTE,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	/* allocate workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
+				      ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	/* create workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	/* validation 0f workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *) dstptrs[i].ptr);
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+	/* Free workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
+			      0, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+
+ err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, eop.handle,
+					     eop.va_handle, eop.mc_addr,
+					     256,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				 queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+}
+
+/**
+ * AMDGPU_HW_IP_GFX
+ * @param device
+ */
+static void amdgpu_command_submission_umq_gfx(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r, i = 0, npkt = 0;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t *ptr;
+	uint32_t q_id, db_handle;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t *doorbell_ptr, *wptr_cpu;
+	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	struct amdgpu_userq_bo queue, shadow, doorbell, rptr, wptr, gds, csa;
+	int fd = amdgpu_device_get_fd(device);
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 18,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &shadow.handle, &shadow.ptr,
+					       &shadow.mc_addr,
+					       &shadow.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &gds.handle, &gds.ptr,
+					       &gds.mc_addr, &gds.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 20,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &csa.handle, &csa.ptr,
+					       &csa.mc_addr, &csa.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	wptr_cpu = (uint64_t *) wptr.ptr;
+
+	ptr = (uint32_t *) queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	/* allocate workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
+				      ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	/* create workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	/* validation 0f workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *) dstptrs[i].ptr);
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+	/* Free workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
+			      0, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+
+ err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					     csa.va_handle, csa.mc_addr,
+					     PAGE_SIZE,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					     gds.va_handle, gds.mc_addr, PAGE_SIZE,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					     shadow.va_handle, shadow.mc_addr,
+					     PAGE_SIZE * 4,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				 queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	struct amdgpu_gpu_info gpu_info = {0};
+	struct drm_amdgpu_info_hw_ip info = {0};
+	int fd = -1;
+	int r;
+	bool arr_cap[AMD_IP_MAX] = {0};
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		r = amdgpu_query_gpu_info(device, &gpu_info);
+		igt_assert_eq(r, 0);
+		r = amdgpu_query_hw_ip_info(device, AMDGPU_HW_IP_GFX, 0, &info);
+		igt_assert_eq(r, 0);
+		r = setup_amdgpu_ip_blocks(major, minor,  &gpu_info, device);
+		igt_assert_eq(r, 0);
+		asic_rings_readness(device, 1, arr_cap);
+	}
+
+	igt_describe("Check-GFX-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
+	igt_subtest_with_dynamic("umq-gfx-with-IP-GFX") {
+		if (arr_cap[AMD_IP_GFX]) {
+			igt_dynamic_f("umq-gfx")
+			    amdgpu_command_submission_umq_gfx(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-COMPUTE-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
+	igt_subtest_with_dynamic("umq-gfx-with-IP-COMPUTE") {
+		if (arr_cap[AMD_IP_COMPUTE]) {
+			igt_dynamic_f("umq-compute")
+			    amdgpu_command_submission_umq_compute(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-SDMA-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
+	igt_subtest_with_dynamic("umq-gfx-with-IP-SDMA") {
+		if (arr_cap[AMD_IP_DMA]) {
+			igt_dynamic_f("umq-sdma")
+			    amdgpu_command_submission_umq_sdma(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-amdgpu_command_submission_umq_timeline_test");
+	igt_subtest_with_dynamic("umq-Syncobj-timeline") {
+		if (arr_cap[AMD_IP_DMA]) {
+			igt_dynamic_f("umq_timeline")
+			    amdgpu_command_submission_umq_timeline_test(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-amdgpu_command_submission_umq_synchronize_test");
+	igt_subtest_with_dynamic("umq-Synchronize") {
+		if (arr_cap[AMD_IP_DMA]) {
+			igt_dynamic_f("umq_synchronize")
+			    amdgpu_command_submission_umq_synchronize_test(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		drm_close_driver(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 7d40f788b..a15a3884c 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -63,7 +63,13 @@ if libdrm_amdgpu.found()
 	else
 		warning('libdrm <= 2.4.104 found, amd_queue_reset test not applicable')
 	endif
-	amdgpu_deps += libdrm_amdgpu
+	 # Check for amdgpu_create_userqueue function
+        if cc.has_function('amdgpu_create_userqueue', dependencies: libdrm_amdgpu)
+                amdgpu_progs += [ 'amd_userq_basic' ]
+        else
+                warning('amdgpu_create_userqueue not found in libdrm_amdgpu, skipping amd userq test')
+        endif
+        amdgpu_deps += libdrm_amdgpu
 endif
 
 foreach prog : amdgpu_progs
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* ✓ Xe.CI.BAT: success for test/amdgpu: add user queue test
  2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
@ 2025-03-27  7:50 ` Patchwork
  2025-03-27  8:13 ` ✗ i915.CI.BAT: failure " Patchwork
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2025-03-27  7:50 UTC (permalink / raw)
  To: Jesse.zhang@amd.com; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 2820 bytes --]

== Series Details ==

Series: test/amdgpu: add user queue test
URL   : https://patchwork.freedesktop.org/series/146842/
State : success

== Summary ==

CI Bug Log - changes from XEIGT_8288_BAT -> XEIGTPW_12851_BAT
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (9 -> 9)
------------------------------

  No changes in participating hosts

Known issues
------------

  Here are the changes found in XEIGTPW_12851_BAT that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@xe_live_ktest@xe_bo:
    - bat-adlp-vf:        NOTRUN -> [SKIP][1] ([Intel XE#2229] / [Intel XE#455]) +2 other tests skip
   [1]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/bat-adlp-vf/igt@xe_live_ktest@xe_bo.html

  * igt@xe_live_ktest@xe_migrate:
    - bat-adlp-vf:        NOTRUN -> [ABORT][2] ([Intel XE#4520]) +1 other test abort
   [2]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/bat-adlp-vf/igt@xe_live_ktest@xe_migrate.html

  * igt@xe_pat@pat-index-xe2:
    - bat-adlp-vf:        NOTRUN -> [SKIP][3] ([Intel XE#977])
   [3]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/bat-adlp-vf/igt@xe_pat@pat-index-xe2.html

  * igt@xe_pat@pat-index-xehpc:
    - bat-adlp-vf:        NOTRUN -> [SKIP][4] ([Intel XE#2838] / [Intel XE#979])
   [4]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/bat-adlp-vf/igt@xe_pat@pat-index-xehpc.html

  * igt@xe_pat@pat-index-xelpg:
    - bat-adlp-vf:        NOTRUN -> [SKIP][5] ([Intel XE#979])
   [5]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/bat-adlp-vf/igt@xe_pat@pat-index-xelpg.html

  
#### Possible fixes ####

  * igt@xe_vm@shared-pte-page:
    - bat-adlp-vf:        [ABORT][6] ([Intel XE#3970]) -> [PASS][7]
   [6]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/bat-adlp-vf/igt@xe_vm@shared-pte-page.html
   [7]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/bat-adlp-vf/igt@xe_vm@shared-pte-page.html

  
  [Intel XE#2229]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2229
  [Intel XE#2838]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2838
  [Intel XE#3970]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3970
  [Intel XE#4520]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4520
  [Intel XE#455]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/455
  [Intel XE#977]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/977
  [Intel XE#979]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/979


Build changes
-------------

  * IGT: IGT_8288 -> IGTPW_12851

  IGTPW_12851: 12851
  IGT_8288: 8288
  xe-2854-14c330bc015ded4a1f1dd1f5aeb8617077aaa7e8: 14c330bc015ded4a1f1dd1f5aeb8617077aaa7e8

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/index.html

[-- Attachment #2: Type: text/html, Size: 3572 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✗ i915.CI.BAT: failure for test/amdgpu: add user queue test
  2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
  2025-03-27  7:50 ` ✓ Xe.CI.BAT: success for " Patchwork
@ 2025-03-27  8:13 ` Patchwork
  2025-03-27 15:31 ` ✗ Xe.CI.Full: " Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2025-03-27  8:13 UTC (permalink / raw)
  To: Jesse.zhang@amd.com; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 2725 bytes --]

== Series Details ==

Series: test/amdgpu: add user queue test
URL   : https://patchwork.freedesktop.org/series/146842/
State : failure

== Summary ==

CI Bug Log - changes from IGT_8288 -> IGTPW_12851
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_12851 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_12851, please notify your bug team (I915-ci-infra@lists.freedesktop.org) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_12851/index.html

Participating hosts (42 -> 41)
------------------------------

  Missing    (1): fi-snb-2520m 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_12851:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_pm_rpm@module-reload:
    - bat-adlp-6:         [PASS][1] -> [DMESG-WARN][2] +78 other tests dmesg-warn
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_8288/bat-adlp-6/igt@i915_pm_rpm@module-reload.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_12851/bat-adlp-6/igt@i915_pm_rpm@module-reload.html

  
Known issues
------------

  Here are the changes found in IGTPW_12851 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@workarounds:
    - bat-arlh-2:         [PASS][3] -> [DMESG-FAIL][4] ([i915#12061]) +1 other test dmesg-fail
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_8288/bat-arlh-2/igt@i915_selftest@live@workarounds.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_12851/bat-arlh-2/igt@i915_selftest@live@workarounds.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@late_gt_pm:
    - fi-cfl-8109u:       [DMESG-WARN][5] ([i915#13735]) -> [PASS][6] +132 other tests pass
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_8288/fi-cfl-8109u/igt@i915_selftest@live@late_gt_pm.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_12851/fi-cfl-8109u/igt@i915_selftest@live@late_gt_pm.html

  
  [i915#12061]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12061
  [i915#13735]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13735


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_8288 -> IGTPW_12851

  CI-20190529: 20190529
  CI_DRM_16321: 14c330bc015ded4a1f1dd1f5aeb8617077aaa7e8 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_12851: 12851
  IGT_8288: 8288

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_12851/index.html

[-- Attachment #2: Type: text/html, Size: 3377 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✗ Xe.CI.Full: failure for test/amdgpu: add user queue test
  2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
  2025-03-27  7:50 ` ✓ Xe.CI.BAT: success for " Patchwork
  2025-03-27  8:13 ` ✗ i915.CI.BAT: failure " Patchwork
@ 2025-03-27 15:31 ` Patchwork
  2025-03-27 16:32 ` [PATCH i-g-t] " Kamil Konieczny
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2025-03-27 15:31 UTC (permalink / raw)
  To: Jesse.zhang@amd.com; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 325 bytes --]

== Series Details ==

Series: test/amdgpu: add user queue test
URL   : https://patchwork.freedesktop.org/series/146842/
State : failure

== Summary ==

ERROR: The runconfig 'XEIGTPW_12851_FULL' does not exist in the database

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/index.html

[-- Attachment #2: Type: text/html, Size: 887 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t] test/amdgpu: add user queue test
  2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
                   ` (2 preceding siblings ...)
  2025-03-27 15:31 ` ✗ Xe.CI.Full: " Patchwork
@ 2025-03-27 16:32 ` Kamil Konieczny
  2025-03-27 19:00 ` vitaly prosyak
  2025-04-06 13:43 ` ✗ Xe.CI.Full: failure for " Patchwork
  5 siblings, 0 replies; 8+ messages in thread
From: Kamil Konieczny @ 2025-03-27 16:32 UTC (permalink / raw)
  To: Jesse.zhang@amd.com
  Cc: igt-dev, Vitaly Prosyak, Alex Deucher, Christian Koenig,
	Srinivasan Shanmugam

Hi Jesse.zhang,
On 2025-03-27 at 15:17:44 +0800, Jesse.zhang@amd.com wrote:
> From: "Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>"
> 

I have two small nits, first about subject:

[PATCH i-g-t] test/amdgpu: add user queue test

s/test/tests/

so it will be:

[PATCH i-g-t] tests/amdgpu: add user queue test

> This patch introduces a new test for AMDGPU user queues, which provides
> functionality for userspace to manage GPU queues directly. The test covers:
> 
> 1. Basic user queue operations for GFX, COMPUTE and SDMA IP blocks
> 2. Synchronization between user queues using syncobjs
> 3. Timeline-based synchronization
> 4. Multi-threaded signaling and waiting scenarios
> 
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
> Signed-off-by: Jesse.zhang <Jesse.zhang@amd.com>
> ---
>  include/drm-uapi/amdgpu_drm.h  |  254 +++++

This is second nit: please make such changes in a separate patch,
please consult README.md for a guide how to do it (a note: you could
also use drm-tip for a changes, just give link to lore.kernel.org
and quote a subject with relevant commit).

>  tests/amdgpu/amd_userq_basic.c | 1706 ++++++++++++++++++++++++++++++++
>  tests/amdgpu/meson.build       |    8 +-
>  3 files changed, 1967 insertions(+), 1 deletion(-)
>  create mode 100644 tests/amdgpu/amd_userq_basic.c
> 
> diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h
> index efe5de6ce..d83216a59 100644
> --- a/include/drm-uapi/amdgpu_drm.h
> +++ b/include/drm-uapi/amdgpu_drm.h
> @@ -54,6 +54,9 @@ extern "C" {
>  #define DRM_AMDGPU_VM			0x13
>  #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
>  #define DRM_AMDGPU_SCHED		0x15
> +#define DRM_AMDGPU_USERQ		0x16
> +#define DRM_AMDGPU_USERQ_SIGNAL		0x17
> +#define DRM_AMDGPU_USERQ_WAIT		0x18
>  
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +74,9 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>  #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> +#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>  
>  /**
>   * DOC: memory domains
> @@ -319,6 +325,241 @@ union drm_amdgpu_ctx {
>  	union drm_amdgpu_ctx_out out;
>  };
>  
> +/* user queue IOCTL operations */
> +#define AMDGPU_USERQ_OP_CREATE	1
> +#define AMDGPU_USERQ_OP_FREE	2
> +
> +/*
> + * This structure is a container to pass input configuration
> + * info for all supported userqueue related operations.
> + * For operation AMDGPU_USERQ_OP_CREATE: user is expected
> + *  to set all fields, excep the parameter 'queue_id'.
> + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
> + *  to be set is 'queue_id', eveything else is ignored.
> + */
> +struct drm_amdgpu_userq_in {
> +	/** AMDGPU_USERQ_OP_* */
> +	__u32	op;
> +	/** Queue id passed for operation USERQ_OP_FREE */
> +	__u32	queue_id;
> +	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
> +	__u32   ip_type;
> +	/**
> +	 * @doorbell_handle: the handle of doorbell GEM object
> +	 * associated to this userqueue client.
> +	 */
> +	__u32   doorbell_handle;
> +	/**
> +	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
> +	 * Kernel will generate absolute doorbell offset using doorbell_handle
> +	 * and doorbell_offset in the doorbell bo.
> +	 */
> +	__u32   doorbell_offset;
> +	__u32   _pad;
> +	/**
> +	 * @queue_va: Virtual address of the GPU memory which holds the queue
> +	 * object. The queue holds the workload packets.
> +	 */
> +	__u64   queue_va;
> +	/**
> +	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
> +	 * aligned.
> +	 */
> +	__u64   queue_size;
> +	/**
> +	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
> +	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
> +	 */
> +	__u64   rptr_va;
> +	/**
> +	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
> +	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
> +	 *
> +	 * Queue, RPTR and WPTR can come from the same object, as long as the size
> +	 * and alignment related requirements are met.
> +	 */
> +	__u64   wptr_va;
> +	/**
> +	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
> +	 * the GPU to uniquely define and identify a usermode queue.
> +	 *
> +	 * MQD data can be of different size for different GPU IP/engine and
> +	 * their respective versions/revisions, so this points to a __u64 *
> +	 * which holds IP specific MQD of this usermode queue.
> +	 */
> +	__u64 mqd;
> +	/**
> +	 * @size: size of MQD data in bytes, it must match the MQD structure
> +	 * size of the respective engine/revision defined in UAPI for ex, for
> +	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
> +	 */
> +	__u64 mqd_size;
> +};
> +
> +/* The structure to carry output of userqueue ops */
> +struct drm_amdgpu_userq_out {
> +	/**
> +	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
> +	 * queue ID to represent the newly created userqueue in the system, otherwise
> +	 * it should be ignored.
> +	 */
> +	__u32	queue_id;
> +	__u32	_pad;
> +};
> +
> +union drm_amdgpu_userq {
> +	struct drm_amdgpu_userq_in in;
> +	struct drm_amdgpu_userq_out out;
> +};
> +
> +/* GFX V11 IP specific MQD parameters */
> +struct drm_amdgpu_userq_mqd_gfx11 {
> +	/**
> +	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
> +	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
> +	 */
> +	__u64   shadow_va;
> +	/**
> +	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
> +	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
> +	 */
> +	__u64   csa_va;
> +};
> +
> +/* GFX V11 SDMA IP specific MQD parameters */
> +struct drm_amdgpu_userq_mqd_sdma_gfx11 {
> +	/**
> +	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
> +	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
> +	 * to get the size.
> +	 */
> +	__u64   csa_va;
> +};
> +
> +/* GFX V11 Compute IP specific MQD parameters */
> +struct drm_amdgpu_userq_mqd_compute_gfx11 {
> +	/**
> +	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
> +	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
> +	 * to get the size.
> +	 */
> +	__u64   eop_va;
> +};
> +
> +/* userq signal/wait ioctl */
> +struct drm_amdgpu_userq_signal {
> +	/**
> +	 * @queue_id: Queue handle used by the userq fence creation function
> +	 * to retrieve the WPTR.
> +	 */
> +	__u32	queue_id;
> +	__u32	pad;
> +	/**
> +	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
> +	 * job to be signaled.
> +	 */
> +	__u64	syncobj_handles;
> +	/**
> +	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
> +	 * @syncobj_handles.
> +	 */
> +	__u64	num_syncobj_handles;
> +	/**
> +	 * @bo_read_handles: The list of BO handles that the submitted user queue job
> +	 * is using for read only. This will update BO fences in the kernel.
> +	 */
> +	__u64	bo_read_handles;
> +	/**
> +	 * @bo_write_handles: The list of BO handles that the submitted user queue job
> +	 * is using for write only. This will update BO fences in the kernel.
> +	 */
> +	__u64	bo_write_handles;
> +	/**
> +	 * @num_bo_read_handles: A count that represents the number of read BO handles in
> +	 * @bo_read_handles.
> +	 */
> +	__u32	num_bo_read_handles;
> +	/**
> +	 * @num_bo_write_handles: A count that represents the number of write BO handles in
> +	 * @bo_write_handles.
> +	 */
> +	__u32	num_bo_write_handles;
> +
> +};
> +
> +struct drm_amdgpu_userq_fence_info {
> +	/**
> +	 * @va: A gpu address allocated for each queue which stores the
> +	 * read pointer (RPTR) value.
> +	 */
> +	__u64	va;
> +	/**
> +	 * @value: A 64 bit value represents the write pointer (WPTR) of the
> +	 * queue commands which compared with the RPTR value to signal the
> +	 * fences.
> +	 */
> +	__u64	value;
> +};
> +
> +struct drm_amdgpu_userq_wait {
> +	/**
> +	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
> +	 * job to get the va/value pairs.
> +	 */
> +	__u64	syncobj_handles;
> +	/**
> +	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
> +	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
> +	 */
> +	__u64	syncobj_timeline_handles;
> +	/**
> +	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
> +	 * user queue job for the corresponding @syncobj_timeline_handles.
> +	 */
> +	__u64	syncobj_timeline_points;
> +	/**
> +	 * @bo_read_handles: The list of read BO handles submitted by the user queue
> +	 * job to get the va/value pairs.
> +	 */
> +	__u64	bo_read_handles;
> +	/**
> +	 * @bo_write_handles: The list of write BO handles submitted by the user queue
> +	 * job to get the va/value pairs.
> +	 */
> +	__u64	bo_write_handles;
> +	/**
> +	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
> +	 * syncobj handles in @syncobj_timeline_handles.
> +	 */
> +	__u16	num_syncobj_timeline_handles;
> +	/**
> +	 * @num_fences: This field can be used both as input and output. As input it defines
> +	 * the maximum number of fences that can be returned and as output it will specify
> +	 * how many fences were actually returned from the ioctl.
> +	 */
> +	__u16	num_fences;
> +	/**
> +	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
> +	 * @syncobj_handles.
> +	 */
> +	__u32	num_syncobj_handles;
> +	/**
> +	 * @num_bo_read_handles: A count that represents the number of read BO handles in
> +	 * @bo_read_handles.
> +	 */
> +	__u32	num_bo_read_handles;
> +	/**
> +	 * @num_bo_write_handles: A count that represents the number of write BO handles in
> +	 * @bo_write_handles.
> +	 */
> +	__u32	num_bo_write_handles;
> +	/**
> +	 * @out_fences: The field is a return value from the ioctl containing the list of
> +	 * address/value pairs to wait for.
> +	 */
> +	__u64	out_fences;
> +};
> +
>  /* vm ioctl */
>  #define AMDGPU_VM_OP_RESERVE_VMID	1
>  #define AMDGPU_VM_OP_UNRESERVE_VMID	2
> @@ -592,6 +833,19 @@ struct drm_amdgpu_gem_va {
>  	__u64 offset_in_bo;
>  	/** Specify mapping size. Must be correctly aligned. */
>  	__u64 map_size;
> +	/**
> +	 * vm_timeline_point is a sequence number used to add new timeline point.
> +	 */
> +	__u64 vm_timeline_point;
> +	/**
> +	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
> +	 * at vm_timeline_point.
> +	 */
> +	__u32 vm_timeline_syncobj_out;
> +	/** the number of syncobj handles in @input_fence_syncobj_handles */
> +	__u32 num_syncobj_handles;
> +	/** Array of sync object handle to wait for given input fences */
> +	__u64 input_fence_syncobj_handles;
>  };
>  
>  #define AMDGPU_HW_IP_GFX          0
> diff --git a/tests/amdgpu/amd_userq_basic.c b/tests/amdgpu/amd_userq_basic.c
> new file mode 100644
> index 000000000..b010fed7a
> --- /dev/null
> +++ b/tests/amdgpu/amd_userq_basic.c
> @@ -0,0 +1,1706 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2014 Advanced Micro Devices, Inc.
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + * Copyright 2023 Advanced Micro Devices, Inc.
> + */

Why three different Copyrights above? Why none of them have 2025 year?
This is new file and a new test so imho this should be:

/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 */

> + #include <pthread.h>
> + #include <time.h>

Add newline here.

Regards,
Kamil

> + #include "lib/amdgpu/amd_memory.h"
> + #include "lib/amdgpu/amd_sdma.h"
> + #include "lib/amdgpu/amd_PM4.h"
> + #include "lib/amdgpu/amd_command_submission.h"
> + #include "lib/amdgpu/amd_compute.h"
> + #include "lib/amdgpu/amd_gfx.h"
> + #include "lib/amdgpu/amd_shaders.h"
> + #include "lib/amdgpu/amd_dispatch.h"
> + #include "include/drm-uapi/amdgpu_drm.h"
> + #include "lib/amdgpu/amd_cs_radv.h"

Sort this alphabetically.


> +
> + #define BUFFER_SIZE (8 * 1024)
> +
> +/* Flag to indicate secure buffer related workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
> +/* Flag to indicate AQL workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
> +
> + #define PACKET_TYPE3			3
> + #define PACKET3(op, n)			((PACKET_TYPE3 << 30) |  \
> +					(((op) & 0xFF) << 8)  |  \
> +					((n) & 0x3FFF) << 16)
> +
> + #define PACKET3_NOP			0x10
> + #define PACKET3_PROTECTED_FENCE_SIGNAL	0xd0
> + #define PACKET3_FENCE_WAIT_MULTI	0xd1
> + #define PACKET3_WRITE_DATA		0x37
> +
> + #define PACKET3_WAIT_REG_MEM		0x3C
> + #define WAIT_REG_MEM_FUNCTION(x)	((x) << 0)
> + #define WAIT_REG_MEM_MEM_SPACE(x)	((x) << 4)
> + #define WAIT_REG_MEM_OPERATION(x)	((x) << 6)
> + #define WAIT_REG_MEM_ENGINE(x)		((x) << 8)
> +
> + #define WR_CONFIRM			(1 << 20)
> + #define WRITE_DATA_DST_SEL(x)		((x) << 8)
> + #define WRITE_DATA_ENGINE_SEL(x)	((x) << 30)
> + #define WRITE_DATA_CACHE_POLICY(x)	((x) << 25)
> + #define WAIT_MEM_ENGINE_SEL(x)		((x) << 0)
> + #define WAIT_MEM_WAIT_PREEMPTABLE(x)	((x) << 1)
> + #define WAIT_MEM_CACHE_POLICY(x)	((x) << 2)
> + #define WAIT_MEM_POLL_INTERVAL(x)	((x) << 16)
> +
> + #define DOORBELL_INDEX			4
> + #define AMDGPU_USERQ_BO_WRITE		1
> +
> + #define	PACKET3_RELEASE_MEM				0x49
> + #define		PACKET3_RELEASE_MEM_CACHE_POLICY(x)	((x) << 25)
> + #define		PACKET3_RELEASE_MEM_DATA_SEL(x)		((x) << 29)
> + #define		PACKET3_RELEASE_MEM_INT_SEL(x)		((x) << 24)
> + #define		CACHE_FLUSH_AND_INV_TS_EVENT		0x00000014
> +
> + #define		PACKET3_RELEASE_MEM_EVENT_TYPE(x)	((x) << 0)
> + #define		PACKET3_RELEASE_MEM_EVENT_INDEX(x)	((x) << 8)
> + #define		PACKET3_RELEASE_MEM_GCR_GLM_WB		(1 << 12)
> + #define		PACKET3_RELEASE_MEM_GCR_GLM_INV		(1 << 13)
> + #define		PACKET3_RELEASE_MEM_GCR_GLV_INV		(1 << 14)
> + #define		PACKET3_RELEASE_MEM_GCR_GL1_INV		(1 << 15)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_US		(1 << 16)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_RANGE	(1 << 17)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_DISCARD	(1 << 19)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_INV		(1 << 20)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_WB		(1 << 21)
> + #define		PACKET3_RELEASE_MEM_GCR_SEQ		(1 << 22)
> +
> +//SDMA related
> + #define SDMA_OPCODE_COPY		1
> + #define SDMA_OPCODE_WRITE		2
> + #define SDMA_COPY_SUB_OPCODE_LINEAR	0
> + #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |       \
> +					(((sub_op) & 0xFF) << 8) |      \
> +					(((op) & 0xFF) << 0))
> + #define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
> + #define lower_32_bits(n) ((uint32_t)((n) & 0xfffffffc))
> +
> +/* user queue IOCTL */
> + #define AMDGPU_USERQ_OP_CREATE  1
> + #define AMDGPU_USERQ_OP_FREE    2
> +
> +/* Flag to indicate secure buffer related workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
> +/* Flag to indicate AQL workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
> +
> +//#define WORKLOAD_COUNT				7
> + #define WORKLOAD_COUNT				1
> + #define DEBUG_USERQUEUE				1
> +
> + #define PAGE_SIZE			4096
> + #define USERMODE_QUEUE_SIZE		(PAGE_SIZE * 256)
> + #define ALIGNMENT			4096
> +
> +struct amdgpu_userq_bo {
> +	amdgpu_bo_handle handle;
> +	amdgpu_va_handle va_handle;
> +	uint64_t mc_addr;
> +	uint64_t size;
> +	void *ptr;
> +};
> +
> +static struct amdgpu_userq_bo shared_userq_bo;
> +static int shared_syncobj_fd1;
> +static int shared_syncobj_fd2;
> +
> +pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
> +pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> +
> + #if DEBUG_USERQUEUE
> +static void packet_dump(uint32_t *ptr, int start, int end)
> +{
> +	int i;
> +
> +	igt_info("\n============PACKET==============\n");
> +	for (i = start; i < end; i++)
> +		igt_info("pkt[%d] = 0x%x\n", i - start, ptr[i]);
> +
> +	igt_info("=================================\n");
> +}
> + #endif
> +
> +static void validation(uint32_t *workload)
> +{
> +	int i = 0;
> +
> +	while (workload[0] != 0xdeadbeaf) {
> +		if (i++ > 100)
> +			break;
> +		usleep(100);
> +	}
> +
> +	igt_info("\n========OUTPUT==========\n");
> +	for (i = 0; i < 5; i++)
> +		igt_info("worklod[%d] = %x\n", i, workload[i]);
> +
> +	igt_info("===========================\n");
> +}
> +
> +static void create_relmem_workload(uint32_t *ptr, int *npkt, int data,
> +			    uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
> +			    uint32_t q_id, uint64_t addr)
> +{
> +	ptr[(*npkt)++] = (PACKET3(PACKET3_RELEASE_MEM, 6));
> +	ptr[(*npkt)++] = 0x0030e514;
> +	ptr[(*npkt)++] = 0x23010000;
> +	ptr[(*npkt)++] = lower_32_bits(addr);
> +	ptr[(*npkt)++] = upper_32_bits(addr);
> +	ptr[(*npkt)++] = 0xffffffff & data;
> +	ptr[(*npkt)++] = 0;
> +	ptr[(*npkt)++] = q_id;
> +	*wptr_cpu = *npkt;
> +	doorbell_ptr[DOORBELL_INDEX] = *npkt;
> +}
> +
> +static int create_submit_workload(uint32_t *ptr, int *npkt, uint32_t data,
> +			   uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
> +			   uint32_t q_id, struct amdgpu_userq_bo *dstptr)
> +{
> + #if DEBUG_USERQUEUE
> +	int start = *npkt;
> + #endif
> +	ptr[(*npkt)++] = PACKET3(PACKET3_WRITE_DATA, 7);
> +	ptr[(*npkt)++] =
> +	    WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
> +
> +	ptr[(*npkt)++] = 0xfffffffc & (dstptr->mc_addr);
> +	ptr[(*npkt)++] = (0xffffffff00000000 & (dstptr->mc_addr)) >> 32;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	create_relmem_workload(ptr, npkt, 0xdeadbeaf, wptr_cpu,
> +			       doorbell_ptr, q_id, dstptr->mc_addr);
> + #if DEBUG_USERQUEUE
> +	packet_dump(ptr, start, *npkt);
> + #endif
> +	return 0;
> +}
> +
> +static void alloc_doorbell(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *doorbell_bo,
> +			   unsigned int size, unsigned int domain)
> +{
> +	struct amdgpu_bo_alloc_request req = {0};
> +	amdgpu_bo_handle buf_handle;
> +	int r;
> +
> +	req.alloc_size = ALIGN(size, PAGE_SIZE);
> +	req.preferred_heap = domain;
> +
> +	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
> +	igt_assert_eq(r, 0);
> +
> +	doorbell_bo->handle = buf_handle;
> +	doorbell_bo->size = req.alloc_size;
> +
> +	r = amdgpu_bo_cpu_map(doorbell_bo->handle,
> +			      (void **)&doorbell_bo->ptr);
> +	igt_assert_eq(r, 0);
> +}
> +
> +static int timeline_syncobj_wait(amdgpu_device_handle device_handle, uint32_t timeline_syncobj_handle)
> +{
> +	uint64_t point, signaled_point;
> +	uint64_t timeout;
> +	struct timespec tp;
> +	uint32_t flags = DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED;
> +	int r;
> +
> +	do {
> +		r = amdgpu_cs_syncobj_query2(device_handle, &timeline_syncobj_handle,
> +					     (uint64_t *)&point, 1, flags);
> +		if (r)
> +			return r;
> +
> +		timeout = 0;
> +		clock_gettime(CLOCK_MONOTONIC, &tp);
> +		timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
> +		timeout += 100000000; //100 millisec
> +		r = amdgpu_cs_syncobj_timeline_wait(device_handle, &timeline_syncobj_handle,
> +						    (uint64_t *)&point, 1, timeout,
> +						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
> +						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +						    NULL);
> +		if (r)
> +			return r;
> +
> +		r = amdgpu_cs_syncobj_query(device_handle, &timeline_syncobj_handle, &signaled_point, 1);
> +		if (r)
> +			return r;
> +	} while (point != signaled_point);
> +
> +	return r;
> +}
> +
> +static int
> +amdgpu_bo_unmap_and_free_uq(amdgpu_device_handle dev, amdgpu_bo_handle bo,
> +			    amdgpu_va_handle va_handle, uint64_t mc_addr, uint64_t size,
> +			    uint32_t timeline_syncobj_handle, uint16_t point)
> +{
> +	amdgpu_bo_cpu_unmap(bo);
> +	amdgpu_bo_va_op_raw2(dev, bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP, timeline_syncobj_handle, point, 0, 0);
> +
> +	amdgpu_va_range_free(va_handle);
> +	amdgpu_bo_free(bo);
> +
> +	return 0;
> +}
> +
> +static int amdgpu_bo_alloc_and_map_uq(amdgpu_device_handle dev,
> +					      uint64_t size,
> +					      uint64_t alignment,
> +					      uint64_t heap,
> +					      uint64_t alloc_flags,
> +					      uint64_t mapping_flags,
> +					      amdgpu_bo_handle *bo,
> +					      void **cpu,
> +					      uint64_t *mc_address,
> +					      amdgpu_va_handle *va_handle,
> +					      uint32_t timeline_syncobj_handle,
> +					      uint64_t point)
> +{
> +	struct amdgpu_bo_alloc_request request = {};
> +	amdgpu_bo_handle buf_handle;
> +	amdgpu_va_handle handle;
> +	uint64_t vmc_addr;
> +	int r;
> +
> +	request.alloc_size = size;
> +	request.phys_alignment = alignment;
> +	request.preferred_heap = heap;
> +	request.flags = alloc_flags;
> +
> +	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_va_range_alloc(dev,
> +				  amdgpu_gpu_va_range_general,
> +				  size, alignment, 0, &vmc_addr,
> +				  &handle, 0);
> +	if (r)
> +		goto error_va_alloc;
> +
> +	r = amdgpu_bo_va_op_raw2(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
> +				   AMDGPU_VM_PAGE_READABLE |
> +				   AMDGPU_VM_PAGE_WRITEABLE |
> +				   AMDGPU_VM_PAGE_EXECUTABLE |
> +				   mapping_flags,
> +				   AMDGPU_VA_OP_MAP,
> +				   timeline_syncobj_handle,
> +				   point, 0, 0);
> +	if (r) {
> +		goto error_va_map;
> +	}
> +
> +	r = amdgpu_bo_cpu_map(buf_handle, cpu);
> +	if (r)
> +		goto error_cpu_map;
> +
> +	*bo = buf_handle;
> +	*mc_address = vmc_addr;
> +	*va_handle = handle;
> +
> +	return 0;
> +
> + error_cpu_map:
> +	amdgpu_bo_cpu_unmap(buf_handle);
> + error_va_map:
> +	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
> + error_va_alloc:
> +	amdgpu_bo_free(buf_handle);
> +	return r;
> +}
> +
> +static void free_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
> +		   uint32_t timeline_syncobj_handle, uint64_t point,
> +		   uint64_t syncobj_handles_array, uint32_t num_syncobj_handles)
> +{
> +	int r;
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device_handle, dstptr->handle, dstptr->va_handle,
> +				     dstptr->mc_addr, PAGE_SIZE,
> +				     timeline_syncobj_handle, point);
> +	igt_assert_eq(r, 0);
> +}
> +
> +static int allocate_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
> +		      uint32_t timeline_syncobj_handle, uint64_t point)
> +{
> +
> +	uint64_t gtt_flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +
> +	int r;
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device_handle, PAGE_SIZE,
> +				       PAGE_SIZE,
> +				       AMDGPU_GEM_DOMAIN_VRAM,
> +				       gtt_flags,
> +				       AMDGPU_VM_MTYPE_UC,
> +				       &dstptr->handle, &dstptr->ptr,
> +				       &dstptr->mc_addr, &dstptr->va_handle,
> +				       timeline_syncobj_handle, point);
> +	memset(&dstptr->ptr, 0x0, sizeof(*dstptr->ptr));
> +	return r;
> +}
> +
> +static int create_sync_objects(int fd, uint32_t *timeline_syncobj_handle,
> +			       uint32_t *timeline_syncobj_handle2)
> +{
> +	int r;
> +
> +	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle);
> +	if (r)
> +		return r;
> +
> +	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle2);
> +
> +	return r;
> +}
> +
> +static void *userq_signal(void *data)
> +{
> +	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
> +	uint32_t q_id, syncobj_handle, syncobj_handle1, db_handle;
> +	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	struct  amdgpu_userq_bo gds, csa;
> +	uint32_t syncarray[2];
> +	uint32_t *ptr;
> +	int r, i;
> +	uint32_t timeline_syncobj_handle;
> +	uint64_t point = 0;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t point2 = 0;
> +	struct drm_amdgpu_userq_signal signal_data;
> +	uint32_t bo_read_handles[1], bo_write_handles[1];
> +	uint32_t read_handle, write_handle;
> +
> +
> +	amdgpu_device_handle device = (amdgpu_device_handle)data;
> +
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +				ALIGNMENT,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&queue.handle, &queue.ptr,
> +				&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&wptr_bo.handle, &wptr_bo.ptr,
> +				&wptr_bo.mc_addr, &wptr_bo.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&rptr.handle, &rptr.ptr,
> +				&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&shadow.handle, &shadow.ptr,
> +				&shadow.mc_addr, &shadow.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&gds.handle, &gds.ptr,
> +				&gds.mc_addr, &gds.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&csa.handle, &csa.ptr,
> +				&csa.mc_addr, &csa.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	//mqd.gds_va = gds.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *)doorbell.ptr;
> +
> +	ptr = (uint32_t *)queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	wptr = (uint64_t *)wptr_bo.ptr;
> +	memset(wptr, 0, sizeof(*wptr));
> +
> +	//amdgpu_userqueue_get_bo_handle(doorbell.handle, &db_handle);
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	r = drmSyncobjCreate(fd, 0, &syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &syncobj_handle1);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjHandleToFD(fd, syncobj_handle, &shared_syncobj_fd2);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjHandleToFD(fd, syncobj_handle1, &shared_syncobj_fd1);
> +	igt_assert_eq(r, 0);
> +
> +	syncarray[0] = syncobj_handle;
> +	syncarray[1] = syncobj_handle1;
> +
> +	ptr[0] = PACKET3(PACKET3_WRITE_DATA, 7);
> +	ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
> +	ptr[2] = 0xfffffffc & (shared_userq_bo.mc_addr);
> +	ptr[3] = (0xffffffff00000000 & (shared_userq_bo.mc_addr)) >> 32;
> +	ptr[4] = 0xdeadbeaf;
> +	ptr[5] = 0xdeadbeaf;
> +	ptr[6] = 0xdeadbeaf;
> +	ptr[7] = 0xdeadbeaf;
> +	ptr[8] = 0xdeadbeaf;
> +
> +	for (i = 9; i <= 60; i++)
> +		ptr[i] = PACKET3(PACKET3_NOP, 0x3fff);
> +
> +	ptr[i++] = PACKET3(PACKET3_PROTECTED_FENCE_SIGNAL, 0);
> +
> +	*wptr = ++i;
> +	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
> +	igt_assert_eq(r, 0);
> +	// Assign the exported handles to the arrays
> +	bo_read_handles[0] = read_handle;
> +	bo_write_handles[0] = write_handle;
> +
> +	signal_data.queue_id = q_id;
> +	signal_data.syncobj_handles = (uint64_t)&syncarray;
> +	signal_data.num_syncobj_handles = 2;
> +	signal_data.bo_write_handles = (uint64_t)bo_write_handles;
> +	signal_data.num_bo_write_handles = 1;
> +	signal_data.bo_read_handles = (uint64_t)bo_read_handles;
> +	signal_data.num_bo_read_handles = 1;
> +
> +	r = amdgpu_userq_signal(device, &signal_data);
> +	igt_assert_eq(r, 0);
> +
> +	doorbell_ptr[DOORBELL_INDEX]  = i;
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +	if (!r)
> +		pthread_cond_signal(&cond);
> +
> +err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					csa.va_handle,
> +					csa.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					gds.va_handle,
> +					gds.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					shadow.va_handle,
> +					shadow.mc_addr, PAGE_SIZE * 4,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
> +				     rptr.mc_addr, PAGE_SIZE);
> +
> +	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
> +				     wptr_bo.mc_addr, PAGE_SIZE);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				     queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +
> +	return (void *)(long)r;
> +}
> +
> +static void *userq_wait(void *data)
> +{
> +	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
> +	struct  amdgpu_userq_bo gds, csa;
> +	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> +	uint32_t syncobj_handle, syncobj_handle1, db_handle;
> +	uint64_t num_fences;
> +	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	uint64_t gpu_addr, reference_val;
> +	uint32_t *ptr;
> +	uint32_t q_id;
> +	int i, r, fd;
> +	uint32_t timeline_syncobj_handle;
> +	uint64_t point = 0;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t point2 = 0;
> +	struct drm_amdgpu_userq_wait wait_data;
> +	uint32_t bo_read_handles[1], bo_write_handles[1];
> +	uint32_t read_handle, write_handle;
> +	uint32_t syncarray[3], points[3];
> +	amdgpu_device_handle device;
> +
> +	pthread_mutex_lock(&lock);
> +	pthread_cond_wait(&cond, &lock);
> +	pthread_mutex_unlock(&lock);
> +
> +	device = (amdgpu_device_handle)data;
> +	fd = amdgpu_device_get_fd(device);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +				ALIGNMENT,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&queue.handle, &queue.ptr,
> +				&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&wptr_bo.handle, &wptr_bo.ptr,
> +				&wptr_bo.mc_addr, &wptr_bo.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&rptr.handle, &rptr.ptr,
> +				&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&shadow.handle, &shadow.ptr,
> +				&shadow.mc_addr, &shadow.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&gds.handle, &gds.ptr,
> +				&gds.mc_addr, &gds.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&csa.handle, &csa.ptr,
> +				&csa.mc_addr, &csa.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *)doorbell.ptr;
> +
> +	ptr = (uint32_t *)queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	wptr = (uint64_t *)wptr_bo.ptr;
> +	memset(wptr, 0, sizeof(*wptr));
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd1, &syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd2, &syncobj_handle1);
> +	igt_assert_eq(r, 0);
> +
> +	syncarray[0] = syncobj_handle;
> +	syncarray[1] = syncobj_handle1;
> +
> +	points[0] = 0;
> +	points[1] = 0;
> +	num_fences = 0;
> +	 r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
> +	igt_assert_eq(r, 0);
> +
> +	// Assign the exported handles to the arrays
> +	bo_read_handles[0] = read_handle;
> +	bo_write_handles[0] = write_handle;
> +
> +	wait_data.syncobj_handles = (uint64_t)syncarray;
> +	wait_data.num_syncobj_handles = 2;
> +	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
> +	wait_data.syncobj_timeline_points = (uint64_t)points;
> +	wait_data.num_syncobj_timeline_handles = 2;
> +	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
> +	wait_data.num_bo_read_handles = 1;
> +	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
> +	wait_data.num_bo_write_handles = 1;
> +	wait_data.out_fences = (uint64_t)fence_info;
> +	wait_data.num_fences = num_fences;
> +
> +	igt_assert_eq(r, 0);
> +
> +	num_fences = wait_data.num_fences;
> +	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
> +	if (!fence_info)
> +		goto err_free_queue;
> +	memset(fence_info, 0, num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
> +	wait_data.out_fences = (uint64_t)fence_info;
> +	r = amdgpu_userq_wait(device, &wait_data);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < num_fences; i++) {
> +		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
> +			num_fences, (fence_info + i)->va, (fence_info + i)->value);
> +
> +		gpu_addr = (fence_info + i)->va;
> +		reference_val = (fence_info + i)->value;
> +		ptr[0] = PACKET3(PACKET3_FENCE_WAIT_MULTI, 4);
> +		ptr[1] = WAIT_MEM_ENGINE_SEL(1) | WAIT_MEM_WAIT_PREEMPTABLE(0) | WAIT_MEM_CACHE_POLICY(3) | WAIT_MEM_POLL_INTERVAL(2);
> +		ptr[2] = 0xffffffff & (gpu_addr);
> +		ptr[3] = (0xffffffff00000000 & (gpu_addr)) >> 16;
> +		ptr[4] = 0xffffffff & (reference_val);
> +		ptr[5] = (0xffffffff00000000 & (reference_val)) >> 32;
> +		*wptr = 6;
> +		doorbell_ptr[DOORBELL_INDEX]  = 6;
> +	}
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					csa.va_handle,
> +					csa.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					gds.va_handle,
> +					gds.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					shadow.va_handle,
> +					shadow.mc_addr, PAGE_SIZE * 4,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
> +				     rptr.mc_addr, PAGE_SIZE);
> +	//igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
> +				     wptr_bo.mc_addr, PAGE_SIZE);
> +	//igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				     queue.mc_addr, USERMODE_QUEUE_SIZE);
> +	//igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, syncobj_handle1);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +	free(fence_info);
> +	return (void *)(long)r;
> +}
> +
> +static void amdgpu_command_submission_umq_synchronize_test(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r;
> +	static pthread_t signal_thread, wait_thread;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +
> +
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE,
> +				       ALIGNMENT,
> +				       AMDGPU_GEM_DOMAIN_GTT,
> +				       gtt_flags,
> +				       AMDGPU_VM_MTYPE_UC,
> +				       &shared_userq_bo.handle, &shared_userq_bo.ptr,
> +				       &shared_userq_bo.mc_addr, &shared_userq_bo.va_handle,
> +				       timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_create(&signal_thread, NULL, userq_signal, device);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_create(&wait_thread, NULL, userq_wait, device);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_join(signal_thread, NULL);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_join(wait_thread, NULL);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free_uq(device, shared_userq_bo.handle,
> +				    shared_userq_bo.va_handle,
> +				    shared_userq_bo.mc_addr,
> +				    PAGE_SIZE, timeline_syncobj_handle2,
> +				    ++point2);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +}
> +
> +static void amdgpu_command_submission_umq_timeline_test(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr, rptr;
> +	struct  amdgpu_userq_bo gds, csa;
> +	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> +	uint64_t num_fences;
> +	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr_cpu;
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	struct  amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
> +	uint32_t q_id, db_handle, *ptr;
> +	uint32_t timeline_syncobj_handle;
> +	uint64_t point = 0;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t point2 = 0;
> +	uint32_t syncarray[3], points[3];
> +	uint32_t test_timeline_syncobj_handle;
> +	uint32_t test_timeline_syncobj_handle2;
> +	uint64_t signal_point, payload;
> +	struct drm_amdgpu_userq_wait wait_data;
> +	int i, r, npkt = 0;
> +	uint32_t bo_read_handles[1], bo_write_handles[1];
> +	uint32_t read_handle, write_handle;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r =  amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&shadow.handle, &shadow.ptr,
> +				&shadow.mc_addr, &shadow.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&gds.handle, &gds.ptr,
> +				&gds.mc_addr, &gds.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&csa.handle, &csa.ptr,
> +				&csa.mc_addr, &csa.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	ptr = (uint32_t *)queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	wptr_cpu = (uint64_t *)wptr.ptr;
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *)dstptrs[i].ptr);
> +	signal_point = 5;
> +	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
> +					      &signal_point, 1);
> +	igt_assert_eq(r, 0);
> +	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
> +				    &payload, 1);
> +	igt_assert_eq(r, 0);
> +	igt_assert_eq(payload, 5);
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *)dstptrs[i].ptr);
> +
> +	signal_point = 10;
> +	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
> +					      &signal_point, 1);
> +	igt_assert_eq(r, 0);
> +	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
> +				    &payload, 1);
> +	igt_assert_eq(r, 0);
> +	igt_assert_eq(payload, 10);
> +
> +	syncarray[0] = test_timeline_syncobj_handle;
> +	syncarray[1] = test_timeline_syncobj_handle;
> +
> +	points[0] = 5;
> +	points[1] = 10;
> +
> +	num_fences = 0;
> +
> +	// Export the buffer object handles
> +	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
> +	igt_assert_eq(r, 0);
> +
> +	// Assign the exported handles to the arrays
> +	bo_read_handles[0] = read_handle;
> +	bo_write_handles[0] = write_handle;
> +
> +	wait_data.syncobj_handles = (uint64_t)syncarray;
> +	wait_data.num_syncobj_handles = 2;
> +	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
> +	wait_data.syncobj_timeline_points = (uint64_t)points;
> +	wait_data.num_syncobj_timeline_handles = 2;
> +	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
> +	wait_data.num_bo_read_handles = 1;
> +	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
> +	wait_data.num_bo_write_handles = 1;
> +	wait_data.out_fences = (uint64_t)fence_info;
> +	wait_data.num_fences = num_fences;
> +	r = amdgpu_userq_wait(device, &wait_data);
> +	igt_assert_eq(r, 0);
> +
> +	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
> +	r = amdgpu_userq_wait(device, &wait_data);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < num_fences; i++)
> +		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
> +			num_fences, (fence_info + i)->va, (fence_info + i)->value);
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +	/* Free workload*/
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
> +			      0, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					csa.va_handle,
> +					csa.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					gds.va_handle,
> +					gds.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					shadow.va_handle,
> +					shadow.mc_addr, PAGE_SIZE * 4,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
> +				     rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle,
> +				     wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				     queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +}
> +
> +/**
> + * AMDGPU_HW_IP_DMA
> + * @param device
> + */
> +static void amdgpu_command_submission_umq_sdma(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r, i = 0, j = 0;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t *ptr, *dstptr;
> +	uint32_t q_id, db_handle;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t *doorbell_ptr, *wptr_cpu;
> +	const int sdma_write_length = WORKLOAD_COUNT;
> +	struct drm_amdgpu_userq_mqd_sdma_gfx11 mqd;
> +	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, dst;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 10,
> +				       ALIGNMENT,
> +				       AMDGPU_GEM_DOMAIN_VRAM,
> +				       gtt_flags | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> +				       AMDGPU_VM_MTYPE_UC,
> +				       &dst.handle, &dst.ptr,
> +				       &dst.mc_addr, &dst.va_handle,
> +				       timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE * 2, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	wptr_cpu = (uint64_t *) wptr.ptr;
> +
> +	ptr = (uint32_t *) queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	dstptr = (uint32_t *)dst.ptr;
> +	memset(dstptr, 0, sizeof(*dstptr) * sdma_write_length);
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_DMA,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	ptr[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 0, 0);
> +	ptr[i++] = lower_32_bits(dst.mc_addr);
> +	ptr[i++] = upper_32_bits(dst.mc_addr);
> +	ptr[i++] = sdma_write_length - 1;
> +	while (j++ < sdma_write_length)
> +		ptr[i++] = 0xdeadbeaf;
> +
> +	*wptr_cpu = i << 2;
> +
> +	doorbell_ptr[DOORBELL_INDEX] = i << 2;
> +
> +	i = 0;
> +	while (dstptr[0] != 0xdeadbeaf) {
> +		if (i++ > 100)
> +			break;
> +		usleep(100);
> +	}
> +
> +	for (int k = 0; k < sdma_write_length; k++) {
> +		igt_assert_eq(dstptr[k], 0xdeadbeaf);
> +	}
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +
> + err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, dst.handle,
> +					dst.va_handle, dst.mc_addr,
> +					PAGE_SIZE * 10,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				 queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +}
> +
> +/**
> + * AMDGPU_HW_IP_COMPUTE
> + * @param device
> + */
> +static void amdgpu_command_submission_umq_compute(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r, i = 0, npkt = 0;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t *ptr;
> +	uint32_t q_id, db_handle;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t *doorbell_ptr, *wptr_cpu;
> +	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
> +	struct drm_amdgpu_userq_mqd_compute_gfx11 mqd;
> +	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, eop;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, 256,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &eop.handle, &eop.ptr,
> +					       &eop.mc_addr, &eop.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.eop_va = eop.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	wptr_cpu = (uint64_t *) wptr.ptr;
> +
> +	ptr = (uint32_t *) queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_COMPUTE,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	/* allocate workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
> +				      ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	/* create workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* validation 0f workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *) dstptrs[i].ptr);
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +	/* Free workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
> +			      0, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +
> + err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, eop.handle,
> +					     eop.va_handle, eop.mc_addr,
> +					     256,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				 queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +}
> +
> +/**
> + * AMDGPU_HW_IP_GFX
> + * @param device
> + */
> +static void amdgpu_command_submission_umq_gfx(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r, i = 0, npkt = 0;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t *ptr;
> +	uint32_t q_id, db_handle;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t *doorbell_ptr, *wptr_cpu;
> +	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	struct amdgpu_userq_bo queue, shadow, doorbell, rptr, wptr, gds, csa;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 18,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &shadow.handle, &shadow.ptr,
> +					       &shadow.mc_addr,
> +					       &shadow.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &gds.handle, &gds.ptr,
> +					       &gds.mc_addr, &gds.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 20,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &csa.handle, &csa.ptr,
> +					       &csa.mc_addr, &csa.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	wptr_cpu = (uint64_t *) wptr.ptr;
> +
> +	ptr = (uint32_t *) queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	/* allocate workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
> +				      ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	/* create workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* validation 0f workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *) dstptrs[i].ptr);
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +	/* Free workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
> +			      0, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +
> + err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					     csa.va_handle, csa.mc_addr,
> +					     PAGE_SIZE,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					     gds.va_handle, gds.mc_addr, PAGE_SIZE,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					     shadow.va_handle, shadow.mc_addr,
> +					     PAGE_SIZE * 4,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				 queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +}
> +
> +igt_main
> +{
> +	amdgpu_device_handle device;
> +	struct amdgpu_gpu_info gpu_info = {0};
> +	struct drm_amdgpu_info_hw_ip info = {0};
> +	int fd = -1;
> +	int r;
> +	bool arr_cap[AMD_IP_MAX] = {0};
> +
> +	igt_fixture {
> +		uint32_t major, minor;
> +		int err;
> +
> +		fd = drm_open_driver(DRIVER_AMDGPU);
> +
> +		err = amdgpu_device_initialize(fd, &major, &minor, &device);
> +		igt_require(err == 0);
> +		r = amdgpu_query_gpu_info(device, &gpu_info);
> +		igt_assert_eq(r, 0);
> +		r = amdgpu_query_hw_ip_info(device, AMDGPU_HW_IP_GFX, 0, &info);
> +		igt_assert_eq(r, 0);
> +		r = setup_amdgpu_ip_blocks(major, minor,  &gpu_info, device);
> +		igt_assert_eq(r, 0);
> +		asic_rings_readness(device, 1, arr_cap);
> +	}
> +
> +	igt_describe("Check-GFX-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
> +	igt_subtest_with_dynamic("umq-gfx-with-IP-GFX") {
> +		if (arr_cap[AMD_IP_GFX]) {
> +			igt_dynamic_f("umq-gfx")
> +			    amdgpu_command_submission_umq_gfx(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-COMPUTE-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
> +	igt_subtest_with_dynamic("umq-gfx-with-IP-COMPUTE") {
> +		if (arr_cap[AMD_IP_COMPUTE]) {
> +			igt_dynamic_f("umq-compute")
> +			    amdgpu_command_submission_umq_compute(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-SDMA-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
> +	igt_subtest_with_dynamic("umq-gfx-with-IP-SDMA") {
> +		if (arr_cap[AMD_IP_DMA]) {
> +			igt_dynamic_f("umq-sdma")
> +			    amdgpu_command_submission_umq_sdma(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-amdgpu_command_submission_umq_timeline_test");
> +	igt_subtest_with_dynamic("umq-Syncobj-timeline") {
> +		if (arr_cap[AMD_IP_DMA]) {
> +			igt_dynamic_f("umq_timeline")
> +			    amdgpu_command_submission_umq_timeline_test(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-amdgpu_command_submission_umq_synchronize_test");
> +	igt_subtest_with_dynamic("umq-Synchronize") {
> +		if (arr_cap[AMD_IP_DMA]) {
> +			igt_dynamic_f("umq_synchronize")
> +			    amdgpu_command_submission_umq_synchronize_test(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_fixture {
> +		amdgpu_device_deinitialize(device);
> +		drm_close_driver(fd);
> +	}
> +}
> diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
> index 7d40f788b..a15a3884c 100644
> --- a/tests/amdgpu/meson.build
> +++ b/tests/amdgpu/meson.build
> @@ -63,7 +63,13 @@ if libdrm_amdgpu.found()
>  	else
>  		warning('libdrm <= 2.4.104 found, amd_queue_reset test not applicable')
>  	endif
> -	amdgpu_deps += libdrm_amdgpu
> +	 # Check for amdgpu_create_userqueue function
> +        if cc.has_function('amdgpu_create_userqueue', dependencies: libdrm_amdgpu)
> +                amdgpu_progs += [ 'amd_userq_basic' ]
> +        else
> +                warning('amdgpu_create_userqueue not found in libdrm_amdgpu, skipping amd userq test')
> +        endif
> +        amdgpu_deps += libdrm_amdgpu
>  endif
>  
>  foreach prog : amdgpu_progs
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t] test/amdgpu: add user queue test
  2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
                   ` (3 preceding siblings ...)
  2025-03-27 16:32 ` [PATCH i-g-t] " Kamil Konieczny
@ 2025-03-27 19:00 ` vitaly prosyak
  2025-04-06 13:43 ` ✗ Xe.CI.Full: failure for " Patchwork
  5 siblings, 0 replies; 8+ messages in thread
From: vitaly prosyak @ 2025-03-27 19:00 UTC (permalink / raw)
  To: Jesse.zhang@amd.com, igt-dev
  Cc: Vitaly Prosyak, Alex Deucher, Christian Koenig,
	Srinivasan Shanmugam

Hi Jesse, please note that several improvements are required, as outlined below.

Thanks, Vitaly

On 2025-03-27 03:17, Jesse.zhang@amd.com wrote:
> From: "Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>"
>
> This patch introduces a new test for AMDGPU user queues, which provides
> functionality for userspace to manage GPU queues directly. The test covers:
>
> 1. Basic user queue operations for GFX, COMPUTE and SDMA IP blocks
> 2. Synchronization between user queues using syncobjs
> 3. Timeline-based synchronization
> 4. Multi-threaded signaling and waiting scenarios
>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
> Signed-off-by: Jesse.zhang <Jesse.zhang@amd.com>
> ---
>  include/drm-uapi/amdgpu_drm.h  |  254 +++++
>  tests/amdgpu/amd_userq_basic.c | 1706 ++++++++++++++++++++++++++++++++
>  tests/amdgpu/meson.build       |    8 +-
>  3 files changed, 1967 insertions(+), 1 deletion(-)
>  create mode 100644 tests/amdgpu/amd_userq_basic.c
>
> diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h
> index efe5de6ce..d83216a59 100644
> --- a/include/drm-uapi/amdgpu_drm.h
> +++ b/include/drm-uapi/amdgpu_drm.h
> @@ -54,6 +54,9 @@ extern "C" {
>  #define DRM_AMDGPU_VM			0x13
>  #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
>  #define DRM_AMDGPU_SCHED		0x15
> +#define DRM_AMDGPU_USERQ		0x16
> +#define DRM_AMDGPU_USERQ_SIGNAL		0x17
> +#define DRM_AMDGPU_USERQ_WAIT		0x18
>  
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +74,9 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>  #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> +#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>  
>  /**
>   * DOC: memory domains
> @@ -319,6 +325,241 @@ union drm_amdgpu_ctx {
>  	union drm_amdgpu_ctx_out out;
>  };
>  
> +/* user queue IOCTL operations */
> +#define AMDGPU_USERQ_OP_CREATE	1
> +#define AMDGPU_USERQ_OP_FREE	2
> +
> +/*
> + * This structure is a container to pass input configuration
> + * info for all supported userqueue related operations.
> + * For operation AMDGPU_USERQ_OP_CREATE: user is expected
> + *  to set all fields, excep the parameter 'queue_id'.
> + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
> + *  to be set is 'queue_id', eveything else is ignored.
> + */
> +struct drm_amdgpu_userq_in {
> +	/** AMDGPU_USERQ_OP_* */
> +	__u32	op;
> +	/** Queue id passed for operation USERQ_OP_FREE */
> +	__u32	queue_id;
> +	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
> +	__u32   ip_type;
> +	/**
> +	 * @doorbell_handle: the handle of doorbell GEM object
> +	 * associated to this userqueue client.
> +	 */
> +	__u32   doorbell_handle;
> +	/**
> +	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
> +	 * Kernel will generate absolute doorbell offset using doorbell_handle
> +	 * and doorbell_offset in the doorbell bo.
> +	 */
> +	__u32   doorbell_offset;
> +	__u32   _pad;
> +	/**
> +	 * @queue_va: Virtual address of the GPU memory which holds the queue
> +	 * object. The queue holds the workload packets.
> +	 */
> +	__u64   queue_va;
> +	/**
> +	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
> +	 * aligned.
> +	 */
> +	__u64   queue_size;
> +	/**
> +	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
> +	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
> +	 */
> +	__u64   rptr_va;
> +	/**
> +	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
> +	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
> +	 *
> +	 * Queue, RPTR and WPTR can come from the same object, as long as the size
> +	 * and alignment related requirements are met.
> +	 */
> +	__u64   wptr_va;
> +	/**
> +	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
> +	 * the GPU to uniquely define and identify a usermode queue.
> +	 *
> +	 * MQD data can be of different size for different GPU IP/engine and
> +	 * their respective versions/revisions, so this points to a __u64 *
> +	 * which holds IP specific MQD of this usermode queue.
> +	 */
> +	__u64 mqd;
> +	/**
> +	 * @size: size of MQD data in bytes, it must match the MQD structure
> +	 * size of the respective engine/revision defined in UAPI for ex, for
> +	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
> +	 */
> +	__u64 mqd_size;
> +};
> +
> +/* The structure to carry output of userqueue ops */
> +struct drm_amdgpu_userq_out {
> +	/**
> +	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
> +	 * queue ID to represent the newly created userqueue in the system, otherwise
> +	 * it should be ignored.
> +	 */
> +	__u32	queue_id;
> +	__u32	_pad;
> +};
> +
> +union drm_amdgpu_userq {
> +	struct drm_amdgpu_userq_in in;
> +	struct drm_amdgpu_userq_out out;
> +};
> +
> +/* GFX V11 IP specific MQD parameters */
> +struct drm_amdgpu_userq_mqd_gfx11 {
> +	/**
> +	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
> +	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
> +	 */
> +	__u64   shadow_va;
> +	/**
> +	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
> +	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
> +	 */
> +	__u64   csa_va;
> +};
> +
> +/* GFX V11 SDMA IP specific MQD parameters */
> +struct drm_amdgpu_userq_mqd_sdma_gfx11 {
> +	/**
> +	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
> +	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
> +	 * to get the size.
> +	 */
> +	__u64   csa_va;
> +};
> +
> +/* GFX V11 Compute IP specific MQD parameters */
> +struct drm_amdgpu_userq_mqd_compute_gfx11 {
> +	/**
> +	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
> +	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
> +	 * to get the size.
> +	 */
> +	__u64   eop_va;
> +};
> +
> +/* userq signal/wait ioctl */
> +struct drm_amdgpu_userq_signal {
> +	/**
> +	 * @queue_id: Queue handle used by the userq fence creation function
> +	 * to retrieve the WPTR.
> +	 */
> +	__u32	queue_id;
> +	__u32	pad;
> +	/**
> +	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
> +	 * job to be signaled.
> +	 */
> +	__u64	syncobj_handles;
> +	/**
> +	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
> +	 * @syncobj_handles.
> +	 */
> +	__u64	num_syncobj_handles;
> +	/**
> +	 * @bo_read_handles: The list of BO handles that the submitted user queue job
> +	 * is using for read only. This will update BO fences in the kernel.
> +	 */
> +	__u64	bo_read_handles;
> +	/**
> +	 * @bo_write_handles: The list of BO handles that the submitted user queue job
> +	 * is using for write only. This will update BO fences in the kernel.
> +	 */
> +	__u64	bo_write_handles;
> +	/**
> +	 * @num_bo_read_handles: A count that represents the number of read BO handles in
> +	 * @bo_read_handles.
> +	 */
> +	__u32	num_bo_read_handles;
> +	/**
> +	 * @num_bo_write_handles: A count that represents the number of write BO handles in
> +	 * @bo_write_handles.
> +	 */
> +	__u32	num_bo_write_handles;
> +
> +};
> +
> +struct drm_amdgpu_userq_fence_info {
> +	/**
> +	 * @va: A gpu address allocated for each queue which stores the
> +	 * read pointer (RPTR) value.
> +	 */
> +	__u64	va;
> +	/**
> +	 * @value: A 64 bit value represents the write pointer (WPTR) of the
> +	 * queue commands which compared with the RPTR value to signal the
> +	 * fences.
> +	 */
> +	__u64	value;
> +};
> +
> +struct drm_amdgpu_userq_wait {
> +	/**
> +	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
> +	 * job to get the va/value pairs.
> +	 */
> +	__u64	syncobj_handles;
> +	/**
> +	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
> +	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
> +	 */
> +	__u64	syncobj_timeline_handles;
> +	/**
> +	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
> +	 * user queue job for the corresponding @syncobj_timeline_handles.
> +	 */
> +	__u64	syncobj_timeline_points;
> +	/**
> +	 * @bo_read_handles: The list of read BO handles submitted by the user queue
> +	 * job to get the va/value pairs.
> +	 */
> +	__u64	bo_read_handles;
> +	/**
> +	 * @bo_write_handles: The list of write BO handles submitted by the user queue
> +	 * job to get the va/value pairs.
> +	 */
> +	__u64	bo_write_handles;
> +	/**
> +	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
> +	 * syncobj handles in @syncobj_timeline_handles.
> +	 */
> +	__u16	num_syncobj_timeline_handles;
> +	/**
> +	 * @num_fences: This field can be used both as input and output. As input it defines
> +	 * the maximum number of fences that can be returned and as output it will specify
> +	 * how many fences were actually returned from the ioctl.
> +	 */
> +	__u16	num_fences;
> +	/**
> +	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
> +	 * @syncobj_handles.
> +	 */
> +	__u32	num_syncobj_handles;
> +	/**
> +	 * @num_bo_read_handles: A count that represents the number of read BO handles in
> +	 * @bo_read_handles.
> +	 */
> +	__u32	num_bo_read_handles;
> +	/**
> +	 * @num_bo_write_handles: A count that represents the number of write BO handles in
> +	 * @bo_write_handles.
> +	 */
> +	__u32	num_bo_write_handles;
> +	/**
> +	 * @out_fences: The field is a return value from the ioctl containing the list of
> +	 * address/value pairs to wait for.
> +	 */
> +	__u64	out_fences;
> +};
> +
>  /* vm ioctl */
>  #define AMDGPU_VM_OP_RESERVE_VMID	1
>  #define AMDGPU_VM_OP_UNRESERVE_VMID	2
> @@ -592,6 +833,19 @@ struct drm_amdgpu_gem_va {
>  	__u64 offset_in_bo;
>  	/** Specify mapping size. Must be correctly aligned. */
>  	__u64 map_size;
> +	/**
> +	 * vm_timeline_point is a sequence number used to add new timeline point.
> +	 */
> +	__u64 vm_timeline_point;
> +	/**
> +	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
> +	 * at vm_timeline_point.
> +	 */
> +	__u32 vm_timeline_syncobj_out;
> +	/** the number of syncobj handles in @input_fence_syncobj_handles */
> +	__u32 num_syncobj_handles;
> +	/** Array of sync object handle to wait for given input fences */
> +	__u64 input_fence_syncobj_handles;
>  };
>  
>  #define AMDGPU_HW_IP_GFX          0
> diff --git a/tests/amdgpu/amd_userq_basic.c b/tests/amdgpu/amd_userq_basic.c
> new file mode 100644
> index 000000000..b010fed7a
> --- /dev/null
> +++ b/tests/amdgpu/amd_userq_basic.c
> @@ -0,0 +1,1706 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2014 Advanced Micro Devices, Inc.
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + * Copyright 2023 Advanced Micro Devices, Inc.
> + */
> + #include <pthread.h>
> + #include <time.h>
> + #include "lib/amdgpu/amd_memory.h"
> + #include "lib/amdgpu/amd_sdma.h"
> + #include "lib/amdgpu/amd_PM4.h"
> + #include "lib/amdgpu/amd_command_submission.h"
> + #include "lib/amdgpu/amd_compute.h"
> + #include "lib/amdgpu/amd_gfx.h"
> + #include "lib/amdgpu/amd_shaders.h"
> + #include "lib/amdgpu/amd_dispatch.h"
> + #include "include/drm-uapi/amdgpu_drm.h"
> + #include "lib/amdgpu/amd_cs_radv.h"
> +
> + #define BUFFER_SIZE (8 * 1024)
> +
> +/* Flag to indicate secure buffer related workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
> +/* Flag to indicate AQL workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
> +

Please move all these defines to the appropriate header files (e.g., amd_PM4.h, amd_sdma.h). Many of these definitions are already declared in the corresponding headers, so merge them to avoid redundancy.

We cannot support multiple declarations of the same identifier across different tests.

> + #define PACKET_TYPE3			3
> + #define PACKET3(op, n)			((PACKET_TYPE3 << 30) |  \
> +					(((op) & 0xFF) << 8)  |  \
> +					((n) & 0x3FFF) << 16)
> +
> + #define PACKET3_NOP			0x10
> + #define PACKET3_PROTECTED_FENCE_SIGNAL	0xd0
> + #define PACKET3_FENCE_WAIT_MULTI	0xd1
> + #define PACKET3_WRITE_DATA		0x37
> +
> + #define PACKET3_WAIT_REG_MEM		0x3C
> + #define WAIT_REG_MEM_FUNCTION(x)	((x) << 0)
> + #define WAIT_REG_MEM_MEM_SPACE(x)	((x) << 4)
> + #define WAIT_REG_MEM_OPERATION(x)	((x) << 6)
> + #define WAIT_REG_MEM_ENGINE(x)		((x) << 8)
> +
> + #define WR_CONFIRM			(1 << 20)
> + #define WRITE_DATA_DST_SEL(x)		((x) << 8)
> + #define WRITE_DATA_ENGINE_SEL(x)	((x) << 30)
> + #define WRITE_DATA_CACHE_POLICY(x)	((x) << 25)
> + #define WAIT_MEM_ENGINE_SEL(x)		((x) << 0)
> + #define WAIT_MEM_WAIT_PREEMPTABLE(x)	((x) << 1)
> + #define WAIT_MEM_CACHE_POLICY(x)	((x) << 2)
> + #define WAIT_MEM_POLL_INTERVAL(x)	((x) << 16)
> +
> + #define DOORBELL_INDEX			4
> + #define AMDGPU_USERQ_BO_WRITE		1
> +
> + #define	PACKET3_RELEASE_MEM				0x49
> + #define		PACKET3_RELEASE_MEM_CACHE_POLICY(x)	((x) << 25)
> + #define		PACKET3_RELEASE_MEM_DATA_SEL(x)		((x) << 29)
> + #define		PACKET3_RELEASE_MEM_INT_SEL(x)		((x) << 24)
> + #define		CACHE_FLUSH_AND_INV_TS_EVENT		0x00000014
> +
> + #define		PACKET3_RELEASE_MEM_EVENT_TYPE(x)	((x) << 0)
> + #define		PACKET3_RELEASE_MEM_EVENT_INDEX(x)	((x) << 8)
> + #define		PACKET3_RELEASE_MEM_GCR_GLM_WB		(1 << 12)
> + #define		PACKET3_RELEASE_MEM_GCR_GLM_INV		(1 << 13)
> + #define		PACKET3_RELEASE_MEM_GCR_GLV_INV		(1 << 14)
> + #define		PACKET3_RELEASE_MEM_GCR_GL1_INV		(1 << 15)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_US		(1 << 16)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_RANGE	(1 << 17)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_DISCARD	(1 << 19)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_INV		(1 << 20)
> + #define		PACKET3_RELEASE_MEM_GCR_GL2_WB		(1 << 21)
> + #define		PACKET3_RELEASE_MEM_GCR_SEQ		(1 << 22)
> +
> +//SDMA related
> + #define SDMA_OPCODE_COPY		1
> + #define SDMA_OPCODE_WRITE		2
> + #define SDMA_COPY_SUB_OPCODE_LINEAR	0
> + #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |       \
> +					(((sub_op) & 0xFF) << 8) |      \
> +					(((op) & 0xFF) << 0))
The macro "upper_32_bits"  is already defined elsewhere...
> + #define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
> + #define lower_32_bits(n) ((uint32_t)((n) & 0xfffffffc))
> +
> +/* user queue IOCTL */
> + #define AMDGPU_USERQ_OP_CREATE  1
> + #define AMDGPU_USERQ_OP_FREE    2
> +
> +/* Flag to indicate secure buffer related workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
> +/* Flag to indicate AQL workload, unused for now */
> + #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
> +
> +//#define WORKLOAD_COUNT				7
> + #define WORKLOAD_COUNT				1
> + #define DEBUG_USERQUEUE				1
> +
> + #define PAGE_SIZE			4096
> + #define USERMODE_QUEUE_SIZE		(PAGE_SIZE * 256)
> + #define ALIGNMENT			4096
> +
> +struct amdgpu_userq_bo {
> +	amdgpu_bo_handle handle;
> +	amdgpu_va_handle va_handle;
> +	uint64_t mc_addr;
> +	uint64_t size;
> +	void *ptr;
> +};
> +
Also, avoid using global variables for maintenance reasons. They can cause issues when helper functions are called from different processes or threads. Our queue_reset test benefits from the absence of global variables, making it easier to assemble new tests.
> +static struct amdgpu_userq_bo shared_userq_bo;
> +static int shared_syncobj_fd1;
> +static int shared_syncobj_fd2;
> +
> +pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
> +pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> +
Additionally, the DEBUG_USERQUEUE flag should be removed or commented out.
> + #if DEBUG_USERQUEUE
> +static void packet_dump(uint32_t *ptr, int start, int end)
> +{
> +	int i;
> +
> +	igt_info("\n============PACKET==============\n");
> +	for (i = start; i < end; i++)
> +		igt_info("pkt[%d] = 0x%x\n", i - start, ptr[i]);
> +
> +	igt_info("=================================\n");
> +}
> + #endif
> +

The function validation is intended solely for debugging purposes, as we cannot reliably wait for a redefined value in memory for a fixed amount of time before breaking the test. Please wrap this validation within a debug conditional (ifdef).

We have comparison functions in hook , for example :

    int (*compare)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, int div);
    int (*compare_pattern)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, int div);

> +static void validation(uint32_t *workload)
> +{
> +	int i = 0;
> +
> +	while (workload[0] != 0xdeadbeaf) {
> +		if (i++ > 100)
> +			break;
> +		usleep(100);
> +	}
> +
> +	igt_info("\n========OUTPUT==========\n");
> +	for (i = 0; i < 5; i++)
> +		igt_info("worklod[%d] = %x\n", i, workload[i]);
> +
> +	igt_info("===========================\n");
> +}
> +

 Packet assembly is an ASIC-specific operation and should be implemented in amd_ip_blocks.c. A separate hook may be required depending on the context.

Please ensure this is applied consistently across all relevant areas.

> +static void create_relmem_workload(uint32_t *ptr, int *npkt, int data,
> +			    uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
> +			    uint32_t q_id, uint64_t addr)
> +{
> +	ptr[(*npkt)++] = (PACKET3(PACKET3_RELEASE_MEM, 6));
> +	ptr[(*npkt)++] = 0x0030e514;
> +	ptr[(*npkt)++] = 0x23010000;
> +	ptr[(*npkt)++] = lower_32_bits(addr);
> +	ptr[(*npkt)++] = upper_32_bits(addr);
> +	ptr[(*npkt)++] = 0xffffffff & data;
> +	ptr[(*npkt)++] = 0;
> +	ptr[(*npkt)++] = q_id;
> +	*wptr_cpu = *npkt;
> +	doorbell_ptr[DOORBELL_INDEX] = *npkt;
> +}
> +
> +static int create_submit_workload(uint32_t *ptr, int *npkt, uint32_t data,
> +			   uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
> +			   uint32_t q_id, struct amdgpu_userq_bo *dstptr)
> +{
> + #if DEBUG_USERQUEUE
> +	int start = *npkt;
> + #endif
> +	ptr[(*npkt)++] = PACKET3(PACKET3_WRITE_DATA, 7);
> +	ptr[(*npkt)++] =
> +	    WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
> +
> +	ptr[(*npkt)++] = 0xfffffffc & (dstptr->mc_addr);
> +	ptr[(*npkt)++] = (0xffffffff00000000 & (dstptr->mc_addr)) >> 32;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	ptr[(*npkt)++] = data;
> +	create_relmem_workload(ptr, npkt, 0xdeadbeaf, wptr_cpu,
> +			       doorbell_ptr, q_id, dstptr->mc_addr);
> + #if DEBUG_USERQUEUE
> +	packet_dump(ptr, start, *npkt);
> + #endif
> +	return 0;
> +}
> +
> +static void alloc_doorbell(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *doorbell_bo,
> +			   unsigned int size, unsigned int domain)
> +{
> +	struct amdgpu_bo_alloc_request req = {0};
> +	amdgpu_bo_handle buf_handle;
> +	int r;
> +
> +	req.alloc_size = ALIGN(size, PAGE_SIZE);
> +	req.preferred_heap = domain;
> +
> +	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
> +	igt_assert_eq(r, 0);
> +
> +	doorbell_bo->handle = buf_handle;
> +	doorbell_bo->size = req.alloc_size;
> +
> +	r = amdgpu_bo_cpu_map(doorbell_bo->handle,
> +			      (void **)&doorbell_bo->ptr);
> +	igt_assert_eq(r, 0);
> +}
> +
> +static int timeline_syncobj_wait(amdgpu_device_handle device_handle, uint32_t timeline_syncobj_handle)
> +{
> +	uint64_t point, signaled_point;
> +	uint64_t timeout;
> +	struct timespec tp;
> +	uint32_t flags = DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED;
> +	int r;
> +
> +	do {
> +		r = amdgpu_cs_syncobj_query2(device_handle, &timeline_syncobj_handle,
> +					     (uint64_t *)&point, 1, flags);
> +		if (r)
> +			return r;
> +
> +		timeout = 0;
> +		clock_gettime(CLOCK_MONOTONIC, &tp);
> +		timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
> +		timeout += 100000000; //100 millisec
> +		r = amdgpu_cs_syncobj_timeline_wait(device_handle, &timeline_syncobj_handle,
> +						    (uint64_t *)&point, 1, timeout,
> +						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
> +						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +						    NULL);
> +		if (r)
> +			return r;
> +
> +		r = amdgpu_cs_syncobj_query(device_handle, &timeline_syncobj_handle, &signaled_point, 1);
> +		if (r)
> +			return r;
> +	} while (point != signaled_point);
> +
> +	return r;
> +}
> +
> +static int
> +amdgpu_bo_unmap_and_free_uq(amdgpu_device_handle dev, amdgpu_bo_handle bo,
> +			    amdgpu_va_handle va_handle, uint64_t mc_addr, uint64_t size,
> +			    uint32_t timeline_syncobj_handle, uint16_t point)
> +{
> +	amdgpu_bo_cpu_unmap(bo);
> +	amdgpu_bo_va_op_raw2(dev, bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP, timeline_syncobj_handle, point, 0, 0);
> +
> +	amdgpu_va_range_free(va_handle);
> +	amdgpu_bo_free(bo);
> +
> +	return 0;
> +}
> +
> +static int amdgpu_bo_alloc_and_map_uq(amdgpu_device_handle dev,
> +					      uint64_t size,
> +					      uint64_t alignment,
> +					      uint64_t heap,
> +					      uint64_t alloc_flags,
> +					      uint64_t mapping_flags,
> +					      amdgpu_bo_handle *bo,
> +					      void **cpu,
> +					      uint64_t *mc_address,
> +					      amdgpu_va_handle *va_handle,
> +					      uint32_t timeline_syncobj_handle,
> +					      uint64_t point)
> +{
> +	struct amdgpu_bo_alloc_request request = {};
> +	amdgpu_bo_handle buf_handle;
> +	amdgpu_va_handle handle;
> +	uint64_t vmc_addr;
> +	int r;
> +
> +	request.alloc_size = size;
> +	request.phys_alignment = alignment;
> +	request.preferred_heap = heap;
> +	request.flags = alloc_flags;
> +
> +	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_va_range_alloc(dev,
> +				  amdgpu_gpu_va_range_general,
> +				  size, alignment, 0, &vmc_addr,
> +				  &handle, 0);
> +	if (r)
> +		goto error_va_alloc;
> +
> +	r = amdgpu_bo_va_op_raw2(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
> +				   AMDGPU_VM_PAGE_READABLE |
> +				   AMDGPU_VM_PAGE_WRITEABLE |
> +				   AMDGPU_VM_PAGE_EXECUTABLE |
> +				   mapping_flags,
> +				   AMDGPU_VA_OP_MAP,
> +				   timeline_syncobj_handle,
> +				   point, 0, 0);
> +	if (r) {
> +		goto error_va_map;
> +	}
> +
> +	r = amdgpu_bo_cpu_map(buf_handle, cpu);
> +	if (r)
> +		goto error_cpu_map;
> +
> +	*bo = buf_handle;
> +	*mc_address = vmc_addr;
> +	*va_handle = handle;
> +
> +	return 0;
> +
> + error_cpu_map:
> +	amdgpu_bo_cpu_unmap(buf_handle);
> + error_va_map:
> +	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
> + error_va_alloc:
> +	amdgpu_bo_free(buf_handle);
> +	return r;
> +}
> +
> +static void free_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
> +		   uint32_t timeline_syncobj_handle, uint64_t point,
> +		   uint64_t syncobj_handles_array, uint32_t num_syncobj_handles)
> +{
> +	int r;
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device_handle, dstptr->handle, dstptr->va_handle,
> +				     dstptr->mc_addr, PAGE_SIZE,
> +				     timeline_syncobj_handle, point);
> +	igt_assert_eq(r, 0);
> +}
> +
> +static int allocate_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
> +		      uint32_t timeline_syncobj_handle, uint64_t point)
> +{
> +
> +	uint64_t gtt_flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +
> +	int r;
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device_handle, PAGE_SIZE,
> +				       PAGE_SIZE,
> +				       AMDGPU_GEM_DOMAIN_VRAM,
> +				       gtt_flags,
> +				       AMDGPU_VM_MTYPE_UC,
> +				       &dstptr->handle, &dstptr->ptr,
> +				       &dstptr->mc_addr, &dstptr->va_handle,
> +				       timeline_syncobj_handle, point);
> +	memset(&dstptr->ptr, 0x0, sizeof(*dstptr->ptr));
> +	return r;
> +}
> +
> +static int create_sync_objects(int fd, uint32_t *timeline_syncobj_handle,
> +			       uint32_t *timeline_syncobj_handle2)
> +{
> +	int r;
> +
> +	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle);
> +	if (r)
> +		return r;
> +
> +	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle2);
> +
> +	return r;
> +}
> +
> +static void *userq_signal(void *data)
> +{
> +	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
> +	uint32_t q_id, syncobj_handle, syncobj_handle1, db_handle;
> +	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	struct  amdgpu_userq_bo gds, csa;
> +	uint32_t syncarray[2];
> +	uint32_t *ptr;
> +	int r, i;
> +	uint32_t timeline_syncobj_handle;
> +	uint64_t point = 0;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t point2 = 0;
> +	struct drm_amdgpu_userq_signal signal_data;
> +	uint32_t bo_read_handles[1], bo_write_handles[1];
> +	uint32_t read_handle, write_handle;
> +
> +
> +	amdgpu_device_handle device = (amdgpu_device_handle)data;
> +
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +				ALIGNMENT,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&queue.handle, &queue.ptr,
> +				&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&wptr_bo.handle, &wptr_bo.ptr,
> +				&wptr_bo.mc_addr, &wptr_bo.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&rptr.handle, &rptr.ptr,
> +				&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&shadow.handle, &shadow.ptr,
> +				&shadow.mc_addr, &shadow.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&gds.handle, &gds.ptr,
> +				&gds.mc_addr, &gds.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&csa.handle, &csa.ptr,
> +				&csa.mc_addr, &csa.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	//mqd.gds_va = gds.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *)doorbell.ptr;
> +
> +	ptr = (uint32_t *)queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	wptr = (uint64_t *)wptr_bo.ptr;
> +	memset(wptr, 0, sizeof(*wptr));
> +
> +	//amdgpu_userqueue_get_bo_handle(doorbell.handle, &db_handle);
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	r = drmSyncobjCreate(fd, 0, &syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &syncobj_handle1);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjHandleToFD(fd, syncobj_handle, &shared_syncobj_fd2);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjHandleToFD(fd, syncobj_handle1, &shared_syncobj_fd1);
> +	igt_assert_eq(r, 0);
> +
> +	syncarray[0] = syncobj_handle;
> +	syncarray[1] = syncobj_handle1;
> +
> +	ptr[0] = PACKET3(PACKET3_WRITE_DATA, 7);
> +	ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
> +	ptr[2] = 0xfffffffc & (shared_userq_bo.mc_addr);
> +	ptr[3] = (0xffffffff00000000 & (shared_userq_bo.mc_addr)) >> 32;
> +	ptr[4] = 0xdeadbeaf;
> +	ptr[5] = 0xdeadbeaf;
> +	ptr[6] = 0xdeadbeaf;
> +	ptr[7] = 0xdeadbeaf;
> +	ptr[8] = 0xdeadbeaf;
> +
> +	for (i = 9; i <= 60; i++)
> +		ptr[i] = PACKET3(PACKET3_NOP, 0x3fff);
> +
> +	ptr[i++] = PACKET3(PACKET3_PROTECTED_FENCE_SIGNAL, 0);
> +
> +	*wptr = ++i;
> +	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
> +	igt_assert_eq(r, 0);
> +	// Assign the exported handles to the arrays
> +	bo_read_handles[0] = read_handle;
> +	bo_write_handles[0] = write_handle;
> +
> +	signal_data.queue_id = q_id;
> +	signal_data.syncobj_handles = (uint64_t)&syncarray;
> +	signal_data.num_syncobj_handles = 2;
> +	signal_data.bo_write_handles = (uint64_t)bo_write_handles;
> +	signal_data.num_bo_write_handles = 1;
> +	signal_data.bo_read_handles = (uint64_t)bo_read_handles;
> +	signal_data.num_bo_read_handles = 1;
> +
> +	r = amdgpu_userq_signal(device, &signal_data);
> +	igt_assert_eq(r, 0);
> +
> +	doorbell_ptr[DOORBELL_INDEX]  = i;
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +	if (!r)
> +		pthread_cond_signal(&cond);
> +
> +err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					csa.va_handle,
> +					csa.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					gds.va_handle,
> +					gds.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					shadow.va_handle,
> +					shadow.mc_addr, PAGE_SIZE * 4,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
> +				     rptr.mc_addr, PAGE_SIZE);
> +
> +	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
> +				     wptr_bo.mc_addr, PAGE_SIZE);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				     queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +
> +	return (void *)(long)r;
> +}
> +
> +static void *userq_wait(void *data)
> +{
> +	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
> +	struct  amdgpu_userq_bo gds, csa;
> +	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> +	uint32_t syncobj_handle, syncobj_handle1, db_handle;
> +	uint64_t num_fences;
> +	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	uint64_t gpu_addr, reference_val;
> +	uint32_t *ptr;
> +	uint32_t q_id;
> +	int i, r, fd;
> +	uint32_t timeline_syncobj_handle;
> +	uint64_t point = 0;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t point2 = 0;
> +	struct drm_amdgpu_userq_wait wait_data;
> +	uint32_t bo_read_handles[1], bo_write_handles[1];
> +	uint32_t read_handle, write_handle;
> +	uint32_t syncarray[3], points[3];
> +	amdgpu_device_handle device;
> +
> +	pthread_mutex_lock(&lock);
> +	pthread_cond_wait(&cond, &lock);
> +	pthread_mutex_unlock(&lock);
> +
> +	device = (amdgpu_device_handle)data;
> +	fd = amdgpu_device_get_fd(device);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +				ALIGNMENT,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&queue.handle, &queue.ptr,
> +				&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&wptr_bo.handle, &wptr_bo.ptr,
> +				&wptr_bo.mc_addr, &wptr_bo.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
> +				PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&rptr.handle, &rptr.ptr,
> +				&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&shadow.handle, &shadow.ptr,
> +				&shadow.mc_addr, &shadow.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&gds.handle, &gds.ptr,
> +				&gds.mc_addr, &gds.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&csa.handle, &csa.ptr,
> +				&csa.mc_addr, &csa.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *)doorbell.ptr;
> +
> +	ptr = (uint32_t *)queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	wptr = (uint64_t *)wptr_bo.ptr;
> +	memset(wptr, 0, sizeof(*wptr));
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd1, &syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd2, &syncobj_handle1);
> +	igt_assert_eq(r, 0);
> +
> +	syncarray[0] = syncobj_handle;
> +	syncarray[1] = syncobj_handle1;
> +
> +	points[0] = 0;
> +	points[1] = 0;
> +	num_fences = 0;
> +	 r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
> +	igt_assert_eq(r, 0);
> +
> +	// Assign the exported handles to the arrays
> +	bo_read_handles[0] = read_handle;
> +	bo_write_handles[0] = write_handle;
> +
> +	wait_data.syncobj_handles = (uint64_t)syncarray;
> +	wait_data.num_syncobj_handles = 2;
> +	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
> +	wait_data.syncobj_timeline_points = (uint64_t)points;
> +	wait_data.num_syncobj_timeline_handles = 2;
> +	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
> +	wait_data.num_bo_read_handles = 1;
> +	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
> +	wait_data.num_bo_write_handles = 1;
> +	wait_data.out_fences = (uint64_t)fence_info;
> +	wait_data.num_fences = num_fences;
> +
> +	igt_assert_eq(r, 0);
> +
> +	num_fences = wait_data.num_fences;
> +	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
> +	if (!fence_info)
> +		goto err_free_queue;
> +	memset(fence_info, 0, num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
> +	wait_data.out_fences = (uint64_t)fence_info;
> +	r = amdgpu_userq_wait(device, &wait_data);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < num_fences; i++) {
> +		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
> +			num_fences, (fence_info + i)->va, (fence_info + i)->value);
> +
> +		gpu_addr = (fence_info + i)->va;
> +		reference_val = (fence_info + i)->value;
> +		ptr[0] = PACKET3(PACKET3_FENCE_WAIT_MULTI, 4);
> +		ptr[1] = WAIT_MEM_ENGINE_SEL(1) | WAIT_MEM_WAIT_PREEMPTABLE(0) | WAIT_MEM_CACHE_POLICY(3) | WAIT_MEM_POLL_INTERVAL(2);
> +		ptr[2] = 0xffffffff & (gpu_addr);
> +		ptr[3] = (0xffffffff00000000 & (gpu_addr)) >> 16;
> +		ptr[4] = 0xffffffff & (reference_val);
> +		ptr[5] = (0xffffffff00000000 & (reference_val)) >> 32;
> +		*wptr = 6;
> +		doorbell_ptr[DOORBELL_INDEX]  = 6;
> +	}
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					csa.va_handle,
> +					csa.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					gds.va_handle,
> +					gds.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					shadow.va_handle,
> +					shadow.mc_addr, PAGE_SIZE * 4,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
> +				     rptr.mc_addr, PAGE_SIZE);
Please remove this line wherever applicable.
 *     //igt_assert_eq(r, 0);
> +	//igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
> +				     wptr_bo.mc_addr, PAGE_SIZE);
> +	//igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				     queue.mc_addr, USERMODE_QUEUE_SIZE);
> +	//igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, syncobj_handle1);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +	free(fence_info);
> +	return (void *)(long)r;
> +}
> +
> +static void amdgpu_command_submission_umq_synchronize_test(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r;
> +	static pthread_t signal_thread, wait_thread;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +
> +
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE,
> +				       ALIGNMENT,
> +				       AMDGPU_GEM_DOMAIN_GTT,
> +				       gtt_flags,
> +				       AMDGPU_VM_MTYPE_UC,
> +				       &shared_userq_bo.handle, &shared_userq_bo.ptr,
> +				       &shared_userq_bo.mc_addr, &shared_userq_bo.va_handle,
> +				       timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_create(&signal_thread, NULL, userq_signal, device);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_create(&wait_thread, NULL, userq_wait, device);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_join(signal_thread, NULL);
> +	igt_assert_eq(r, 0);
> +
> +	r = pthread_join(wait_thread, NULL);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free_uq(device, shared_userq_bo.handle,
> +				    shared_userq_bo.va_handle,
> +				    shared_userq_bo.mc_addr,
> +				    PAGE_SIZE, timeline_syncobj_handle2,
> +				    ++point2);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +}
> +
> +static void amdgpu_command_submission_umq_timeline_test(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr, rptr;
> +	struct  amdgpu_userq_bo gds, csa;
> +	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> +	uint64_t num_fences;
> +	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr_cpu;
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	struct  amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
> +	uint32_t q_id, db_handle, *ptr;
> +	uint32_t timeline_syncobj_handle;
> +	uint64_t point = 0;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t point2 = 0;
> +	uint32_t syncarray[3], points[3];
> +	uint32_t test_timeline_syncobj_handle;
> +	uint32_t test_timeline_syncobj_handle2;
> +	uint64_t signal_point, payload;
> +	struct drm_amdgpu_userq_wait wait_data;
> +	int i, r, npkt = 0;
> +	uint32_t bo_read_handles[1], bo_write_handles[1];
> +	uint32_t read_handle, write_handle;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r =  amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_GTT,
> +				gtt_flags,
> +				AMDGPU_VM_MTYPE_UC,
> +				&shadow.handle, &shadow.ptr,
> +				&shadow.mc_addr, &shadow.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&gds.handle, &gds.ptr,
> +				&gds.mc_addr, &gds.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				gtt_flags,
> +				0,
> +				&csa.handle, &csa.ptr,
> +				&csa.mc_addr, &csa.va_handle,
> +				timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	ptr = (uint32_t *)queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	wptr_cpu = (uint64_t *)wptr.ptr;
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *)dstptrs[i].ptr);
> +	signal_point = 5;
> +	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
> +					      &signal_point, 1);
> +	igt_assert_eq(r, 0);
> +	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
> +				    &payload, 1);
> +	igt_assert_eq(r, 0);
> +	igt_assert_eq(payload, 5);
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *)dstptrs[i].ptr);
> +
> +	signal_point = 10;
> +	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
> +					      &signal_point, 1);
> +	igt_assert_eq(r, 0);
> +	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
> +				    &payload, 1);
> +	igt_assert_eq(r, 0);
> +	igt_assert_eq(payload, 10);
> +
> +	syncarray[0] = test_timeline_syncobj_handle;
> +	syncarray[1] = test_timeline_syncobj_handle;
> +
> +	points[0] = 5;
> +	points[1] = 10;
> +
> +	num_fences = 0;
> +
> +	// Export the buffer object handles
> +	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
> +	igt_assert_eq(r, 0);
> +
> +	// Assign the exported handles to the arrays
> +	bo_read_handles[0] = read_handle;
> +	bo_write_handles[0] = write_handle;
> +
> +	wait_data.syncobj_handles = (uint64_t)syncarray;
> +	wait_data.num_syncobj_handles = 2;
> +	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
> +	wait_data.syncobj_timeline_points = (uint64_t)points;
> +	wait_data.num_syncobj_timeline_handles = 2;
> +	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
> +	wait_data.num_bo_read_handles = 1;
> +	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
> +	wait_data.num_bo_write_handles = 1;
> +	wait_data.out_fences = (uint64_t)fence_info;
> +	wait_data.num_fences = num_fences;
> +	r = amdgpu_userq_wait(device, &wait_data);
> +	igt_assert_eq(r, 0);
> +
> +	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
> +	r = amdgpu_userq_wait(device, &wait_data);
> +	igt_assert_eq(r, 0);
> +
> +	for (i = 0; i < num_fences; i++)
> +		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
> +			num_fences, (fence_info + i)->va, (fence_info + i)->value);
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +	/* Free workload*/
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
> +			      0, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					csa.va_handle,
> +					csa.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					gds.va_handle,
> +					gds.mc_addr, PAGE_SIZE,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					shadow.va_handle,
> +					shadow.mc_addr, PAGE_SIZE * 4,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
> +				     rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle,
> +				     wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				     queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +}
> +
> +/**
> + * AMDGPU_HW_IP_DMA
> + * @param device
> + */
> +static void amdgpu_command_submission_umq_sdma(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r, i = 0, j = 0;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t *ptr, *dstptr;
> +	uint32_t q_id, db_handle;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t *doorbell_ptr, *wptr_cpu;
> +	const int sdma_write_length = WORKLOAD_COUNT;
> +	struct drm_amdgpu_userq_mqd_sdma_gfx11 mqd;
> +	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, dst;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 10,
> +				       ALIGNMENT,
> +				       AMDGPU_GEM_DOMAIN_VRAM,
> +				       gtt_flags | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> +				       AMDGPU_VM_MTYPE_UC,
> +				       &dst.handle, &dst.ptr,
> +				       &dst.mc_addr, &dst.va_handle,
> +				       timeline_syncobj_handle, ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE * 2, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	wptr_cpu = (uint64_t *) wptr.ptr;
> +
> +	ptr = (uint32_t *) queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	dstptr = (uint32_t *)dst.ptr;
> +	memset(dstptr, 0, sizeof(*dstptr) * sdma_write_length);
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_DMA,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	ptr[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 0, 0);
> +	ptr[i++] = lower_32_bits(dst.mc_addr);
> +	ptr[i++] = upper_32_bits(dst.mc_addr);
> +	ptr[i++] = sdma_write_length - 1;
> +	while (j++ < sdma_write_length)
> +		ptr[i++] = 0xdeadbeaf;
> +
> +	*wptr_cpu = i << 2;
> +
> +	doorbell_ptr[DOORBELL_INDEX] = i << 2;
> +
> +	i = 0;
> +	while (dstptr[0] != 0xdeadbeaf) {
> +		if (i++ > 100)
> +			break;
> +		usleep(100);
> +	}
> +
> +	for (int k = 0; k < sdma_write_length; k++) {
> +		igt_assert_eq(dstptr[k], 0xdeadbeaf);
> +	}
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +
> + err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, dst.handle,
> +					dst.va_handle, dst.mc_addr,
> +					PAGE_SIZE * 10,
> +					timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				 queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +}
> +
> +/**
> + * AMDGPU_HW_IP_COMPUTE
> + * @param device
> + */
> +static void amdgpu_command_submission_umq_compute(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r, i = 0, npkt = 0;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t *ptr;
> +	uint32_t q_id, db_handle;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t *doorbell_ptr, *wptr_cpu;
> +	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
> +	struct drm_amdgpu_userq_mqd_compute_gfx11 mqd;
> +	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, eop;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, 256,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &eop.handle, &eop.ptr,
> +					       &eop.mc_addr, &eop.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.eop_va = eop.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	wptr_cpu = (uint64_t *) wptr.ptr;
> +
> +	ptr = (uint32_t *) queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_COMPUTE,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	/* allocate workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
> +				      ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	/* create workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* validation 0f workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *) dstptrs[i].ptr);
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +	/* Free workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
> +			      0, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +
> + err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, eop.handle,
> +					     eop.va_handle, eop.mc_addr,
> +					     256,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				 queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +}
> +
> +/**
> + * AMDGPU_HW_IP_GFX
> + * @param device
> + */
> +static void amdgpu_command_submission_umq_gfx(amdgpu_device_handle device,
> +					      bool ce_avails)
> +{
> +	int r, i = 0, npkt = 0;
> +	uint64_t gtt_flags = 0;
> +	uint16_t point = 0;
> +	uint16_t point2 = 0;
> +	uint32_t *ptr;
> +	uint32_t q_id, db_handle;
> +	uint32_t timeline_syncobj_handle;
> +	uint32_t timeline_syncobj_handle2;
> +	uint64_t *doorbell_ptr, *wptr_cpu;
> +	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
> +	struct drm_amdgpu_userq_mqd_gfx11 mqd;
> +	struct amdgpu_userq_bo queue, shadow, doorbell, rptr, wptr, gds, csa;
> +	int fd = amdgpu_device_get_fd(device);
> +
> +	r = create_sync_objects(fd, &timeline_syncobj_handle,
> +				&timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&queue.handle, &queue.ptr,
> +					&queue.mc_addr, &queue.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&wptr.handle, &wptr.ptr,
> +					&wptr.mc_addr, &wptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device, 8,
> +					ALIGNMENT,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					gtt_flags,
> +					AMDGPU_VM_MTYPE_UC,
> +					&rptr.handle, &rptr.ptr,
> +					&rptr.mc_addr, &rptr.va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 18,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &shadow.handle, &shadow.ptr,
> +					       &shadow.mc_addr,
> +					       &shadow.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &gds.handle, &gds.ptr,
> +					       &gds.mc_addr, &gds.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 20,
> +					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
> +					       gtt_flags, AMDGPU_VM_MTYPE_UC,
> +					       &csa.handle, &csa.ptr,
> +					       &csa.mc_addr, &csa.va_handle,
> +					       timeline_syncobj_handle,
> +					       ++point);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
> +
> +	mqd.shadow_va = shadow.mc_addr;
> +	mqd.csa_va = csa.mc_addr;
> +
> +	doorbell_ptr = (uint64_t *) doorbell.ptr;
> +
> +	wptr_cpu = (uint64_t *) wptr.ptr;
> +
> +	ptr = (uint32_t *) queue.ptr;
> +	memset(ptr, 0, sizeof(*ptr));
> +
> +	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
> +
> +
> +	/* Create the Usermode Queue */
> +	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
> +				    db_handle, DOORBELL_INDEX,
> +				    queue.mc_addr, USERMODE_QUEUE_SIZE,
> +				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
> +	igt_assert_eq(r, 0);
> +	if (r)
> +		goto err_free_queue;
> +
> +	/* allocate workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
> +				      ++point);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* wait */
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
> +	igt_assert_eq(r, 0);
> +
> +	/* create workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++) {
> +		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
> +					   wptr_cpu, doorbell_ptr, q_id,
> +					   &dstptrs[i]);
> +		igt_assert_eq(r, 0);
> +	}
> +
> +	/* validation 0f workload pkt */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		validation((uint32_t *) dstptrs[i].ptr);
> +
> +	/* Free the Usermode Queue */
> +	r = amdgpu_free_userqueue(device, q_id);
> +	igt_assert_eq(r, 0);
> +
> +	/* Free workload */
> +	for (i = 0; i < WORKLOAD_COUNT; i++)
> +		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
> +			      0, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +
> + err_free_queue:
> +	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
> +					     csa.va_handle, csa.mc_addr,
> +					     PAGE_SIZE,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
> +					     gds.va_handle, gds.mc_addr, PAGE_SIZE,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
> +					     shadow.va_handle, shadow.mc_addr,
> +					     PAGE_SIZE * 4,
> +					     timeline_syncobj_handle2, ++point2);
> +	igt_assert_eq(r, 0);
> +
> +	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_cpu_unmap(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_free(doorbell.handle);
> +	igt_assert_eq(r, 0);
> +
> +	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
> +
> +	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
> +				 queue.mc_addr, USERMODE_QUEUE_SIZE);
> +
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle);
> +	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
> +}
> +
> +igt_main
> +{
> +	amdgpu_device_handle device;
> +	struct amdgpu_gpu_info gpu_info = {0};
> +	struct drm_amdgpu_info_hw_ip info = {0};
> +	int fd = -1;
> +	int r;
> +	bool arr_cap[AMD_IP_MAX] = {0};
> +
> +	igt_fixture {
> +		uint32_t major, minor;
> +		int err;
> +
> +		fd = drm_open_driver(DRIVER_AMDGPU);
> +
> +		err = amdgpu_device_initialize(fd, &major, &minor, &device);
> +		igt_require(err == 0);
> +		r = amdgpu_query_gpu_info(device, &gpu_info);
> +		igt_assert_eq(r, 0);
> +		r = amdgpu_query_hw_ip_info(device, AMDGPU_HW_IP_GFX, 0, &info);
> +		igt_assert_eq(r, 0);
> +		r = setup_amdgpu_ip_blocks(major, minor,  &gpu_info, device);
> +		igt_assert_eq(r, 0);
> +		asic_rings_readness(device, 1, arr_cap);
> +	}
> +
> +	igt_describe("Check-GFX-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
> +	igt_subtest_with_dynamic("umq-gfx-with-IP-GFX") {
> +		if (arr_cap[AMD_IP_GFX]) {
> +			igt_dynamic_f("umq-gfx")
> +			    amdgpu_command_submission_umq_gfx(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-COMPUTE-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
> +	igt_subtest_with_dynamic("umq-gfx-with-IP-COMPUTE") {
> +		if (arr_cap[AMD_IP_COMPUTE]) {
> +			igt_dynamic_f("umq-compute")
> +			    amdgpu_command_submission_umq_compute(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-SDMA-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
> +	igt_subtest_with_dynamic("umq-gfx-with-IP-SDMA") {
> +		if (arr_cap[AMD_IP_DMA]) {
> +			igt_dynamic_f("umq-sdma")
> +			    amdgpu_command_submission_umq_sdma(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-amdgpu_command_submission_umq_timeline_test");
> +	igt_subtest_with_dynamic("umq-Syncobj-timeline") {
> +		if (arr_cap[AMD_IP_DMA]) {
> +			igt_dynamic_f("umq_timeline")
> +			    amdgpu_command_submission_umq_timeline_test(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_describe("Check-amdgpu_command_submission_umq_synchronize_test");
> +	igt_subtest_with_dynamic("umq-Synchronize") {
> +		if (arr_cap[AMD_IP_DMA]) {
> +			igt_dynamic_f("umq_synchronize")
> +			    amdgpu_command_submission_umq_synchronize_test(device,
> +							      info.
> +							      hw_ip_version_major
> +							      < 11);
> +		}
> +	}
> +
> +	igt_fixture {
> +		amdgpu_device_deinitialize(device);
> +		drm_close_driver(fd);
> +	}
> +}
> diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
> index 7d40f788b..a15a3884c 100644
> --- a/tests/amdgpu/meson.build
> +++ b/tests/amdgpu/meson.build
> @@ -63,7 +63,13 @@ if libdrm_amdgpu.found()
>  	else
>  		warning('libdrm <= 2.4.104 found, amd_queue_reset test not applicable')
>  	endif
> -	amdgpu_deps += libdrm_amdgpu
> +	 # Check for amdgpu_create_userqueue function
> +        if cc.has_function('amdgpu_create_userqueue', dependencies: libdrm_amdgpu)
> +                amdgpu_progs += [ 'amd_userq_basic' ]
> +        else
> +                warning('amdgpu_create_userqueue not found in libdrm_amdgpu, skipping amd userq test')
> +        endif
> +        amdgpu_deps += libdrm_amdgpu
>  endif
>  
>  foreach prog : amdgpu_progs

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✗ Xe.CI.Full: failure for test/amdgpu: add user queue test
  2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
                   ` (4 preceding siblings ...)
  2025-03-27 19:00 ` vitaly prosyak
@ 2025-04-06 13:43 ` Patchwork
  5 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2025-04-06 13:43 UTC (permalink / raw)
  To: Jesse.zhang@amd.com; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 79637 bytes --]

== Series Details ==

Series: test/amdgpu: add user queue test
URL   : https://patchwork.freedesktop.org/series/146842/
State : failure

== Summary ==

CI Bug Log - changes from XEIGT_8288_FULL -> XEIGTPW_12851_FULL
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with XEIGTPW_12851_FULL absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in XEIGTPW_12851_FULL, please notify your bug team (I915-ci-infra@lists.freedesktop.org) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (4 -> 3)
------------------------------

  Missing    (1): shard-adlp 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in XEIGTPW_12851_FULL:

### IGT changes ###

#### Possible regressions ####

  * igt@xe_pm_residency@idle-residency-on-exec@gt1-engine-drm_xe_engine_class_video_decode:
    - shard-bmg:          NOTRUN -> [INCOMPLETE][1] +1 other test incomplete
   [1]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@xe_pm_residency@idle-residency-on-exec@gt1-engine-drm_xe_engine_class_video_decode.html

  
Known issues
------------

  Here are the changes found in XEIGTPW_12851_FULL that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_async_flips@async-flip-with-page-flip-events-atomic@pipe-d-hdmi-a-2-4-rc-ccs-cc:
    - shard-dg2-set2:     NOTRUN -> [SKIP][2] ([Intel XE#3767]) +15 other tests skip
   [2]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_async_flips@async-flip-with-page-flip-events-atomic@pipe-d-hdmi-a-2-4-rc-ccs-cc.html

  * igt@kms_async_flips@test-cursor-atomic:
    - shard-lnl:          [PASS][3] -> [SKIP][4] ([Intel XE#664])
   [3]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-6/igt@kms_async_flips@test-cursor-atomic.html
   [4]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@kms_async_flips@test-cursor-atomic.html

  * igt@kms_big_fb@linear-32bpp-rotate-270:
    - shard-bmg:          NOTRUN -> [SKIP][5] ([Intel XE#2327]) +4 other tests skip
   [5]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-8/igt@kms_big_fb@linear-32bpp-rotate-270.html

  * igt@kms_big_fb@x-tiled-16bpp-rotate-270:
    - shard-lnl:          NOTRUN -> [SKIP][6] ([Intel XE#1407])
   [6]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@kms_big_fb@x-tiled-16bpp-rotate-270.html

  * igt@kms_big_fb@x-tiled-8bpp-rotate-270:
    - shard-dg2-set2:     NOTRUN -> [SKIP][7] ([Intel XE#316]) +1 other test skip
   [7]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@kms_big_fb@x-tiled-8bpp-rotate-270.html

  * igt@kms_big_fb@y-tiled-16bpp-rotate-0:
    - shard-lnl:          NOTRUN -> [SKIP][8] ([Intel XE#1124]) +4 other tests skip
   [8]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@kms_big_fb@y-tiled-16bpp-rotate-0.html

  * igt@kms_big_fb@yf-tiled-16bpp-rotate-0:
    - shard-dg2-set2:     NOTRUN -> [SKIP][9] ([Intel XE#1124]) +6 other tests skip
   [9]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@kms_big_fb@yf-tiled-16bpp-rotate-0.html

  * igt@kms_big_fb@yf-tiled-16bpp-rotate-270:
    - shard-bmg:          NOTRUN -> [SKIP][10] ([Intel XE#1124]) +12 other tests skip
   [10]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@kms_big_fb@yf-tiled-16bpp-rotate-270.html

  * igt@kms_bw@connected-linear-tiling-2-displays-1920x1080p:
    - shard-bmg:          [PASS][11] -> [SKIP][12] ([Intel XE#2314] / [Intel XE#2894])
   [11]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-7/igt@kms_bw@connected-linear-tiling-2-displays-1920x1080p.html
   [12]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@kms_bw@connected-linear-tiling-2-displays-1920x1080p.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][13] ([Intel XE#2191]) +1 other test skip
   [13]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_bw@connected-linear-tiling-2-displays-1920x1080p.html

  * igt@kms_bw@connected-linear-tiling-2-displays-3840x2160p:
    - shard-dg2-set2:     [PASS][14] -> [SKIP][15] ([Intel XE#2191]) +1 other test skip
   [14]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@kms_bw@connected-linear-tiling-2-displays-3840x2160p.html
   [15]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_bw@connected-linear-tiling-2-displays-3840x2160p.html

  * igt@kms_bw@connected-linear-tiling-3-displays-2560x1440p:
    - shard-bmg:          NOTRUN -> [SKIP][16] ([Intel XE#2314] / [Intel XE#2894])
   [16]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_bw@connected-linear-tiling-3-displays-2560x1440p.html

  * igt@kms_bw@linear-tiling-2-displays-1920x1080p:
    - shard-bmg:          NOTRUN -> [SKIP][17] ([Intel XE#367]) +1 other test skip
   [17]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@kms_bw@linear-tiling-2-displays-1920x1080p.html

  * igt@kms_bw@linear-tiling-2-displays-2160x1440p:
    - shard-lnl:          NOTRUN -> [SKIP][18] ([Intel XE#367])
   [18]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@kms_bw@linear-tiling-2-displays-2160x1440p.html

  * igt@kms_bw@linear-tiling-3-displays-3840x2160p:
    - shard-dg2-set2:     NOTRUN -> [SKIP][19] ([Intel XE#367]) +1 other test skip
   [19]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_bw@linear-tiling-3-displays-3840x2160p.html

  * igt@kms_ccs@ccs-on-another-bo-4-tiled-mtl-rc-ccs@pipe-d-hdmi-a-2:
    - shard-dg2-set2:     NOTRUN -> [SKIP][20] ([Intel XE#787]) +150 other tests skip
   [20]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_ccs@ccs-on-another-bo-4-tiled-mtl-rc-ccs@pipe-d-hdmi-a-2.html

  * igt@kms_ccs@crc-primary-basic-y-tiled-ccs:
    - shard-bmg:          NOTRUN -> [SKIP][21] ([Intel XE#2887]) +13 other tests skip
   [21]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@kms_ccs@crc-primary-basic-y-tiled-ccs.html

  * igt@kms_ccs@crc-primary-suspend-4-tiled-dg2-mc-ccs:
    - shard-bmg:          NOTRUN -> [SKIP][22] ([Intel XE#3432]) +1 other test skip
   [22]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_ccs@crc-primary-suspend-4-tiled-dg2-mc-ccs.html
    - shard-dg2-set2:     NOTRUN -> [INCOMPLETE][23] ([Intel XE#3862]) +1 other test incomplete
   [23]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_ccs@crc-primary-suspend-4-tiled-dg2-mc-ccs.html

  * igt@kms_ccs@crc-primary-suspend-4-tiled-mtl-rc-ccs:
    - shard-lnl:          NOTRUN -> [SKIP][24] ([Intel XE#3432])
   [24]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_ccs@crc-primary-suspend-4-tiled-mtl-rc-ccs.html

  * igt@kms_ccs@crc-sprite-planes-basic-4-tiled-bmg-ccs@pipe-a-edp-1:
    - shard-lnl:          NOTRUN -> [SKIP][25] ([Intel XE#2669]) +3 other tests skip
   [25]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@kms_ccs@crc-sprite-planes-basic-4-tiled-bmg-ccs@pipe-a-edp-1.html

  * igt@kms_ccs@crc-sprite-planes-basic-4-tiled-mtl-rc-ccs@pipe-d-dp-2:
    - shard-dg2-set2:     NOTRUN -> [SKIP][26] ([Intel XE#455] / [Intel XE#787]) +36 other tests skip
   [26]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_ccs@crc-sprite-planes-basic-4-tiled-mtl-rc-ccs@pipe-d-dp-2.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-a-dp-4:
    - shard-dg2-set2:     [PASS][27] -> [DMESG-WARN][28] ([Intel XE#1727] / [Intel XE#3113]) +1 other test dmesg-warn
   [27]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-a-dp-4.html
   [28]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-a-dp-4.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-hdmi-a-6:
    - shard-dg2-set2:     [PASS][29] -> [INCOMPLETE][30] ([Intel XE#3124] / [Intel XE#4345])
   [29]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-hdmi-a-6.html
   [30]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs@pipe-b-hdmi-a-6.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs:
    - shard-dg2-set2:     [PASS][31] -> [INCOMPLETE][32] ([Intel XE#1727] / [Intel XE#2705] / [Intel XE#3113] / [Intel XE#4212] / [Intel XE#4522])
   [31]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs.html
   [32]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc:
    - shard-dg2-set2:     [PASS][33] -> [INCOMPLETE][34] ([Intel XE#1727] / [Intel XE#3113] / [Intel XE#3124])
   [33]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc.html
   [34]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc.html
    - shard-lnl:          NOTRUN -> [SKIP][35] ([Intel XE#2887]) +3 other tests skip
   [35]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc@pipe-b-dp-4:
    - shard-dg2-set2:     [PASS][36] -> [INCOMPLETE][37] ([Intel XE#3124])
   [36]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc@pipe-b-dp-4.html
   [37]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs-cc@pipe-b-dp-4.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs@pipe-a-dp-4:
    - shard-dg2-set2:     NOTRUN -> [INCOMPLETE][38] ([Intel XE#1727] / [Intel XE#2705] / [Intel XE#3113] / [Intel XE#4212] / [Intel XE#4522])
   [38]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@kms_ccs@random-ccs-data-4-tiled-dg2-rc-ccs@pipe-a-dp-4.html

  * igt@kms_ccs@random-ccs-data-4-tiled-lnl-ccs:
    - shard-dg2-set2:     NOTRUN -> [SKIP][39] ([Intel XE#2907]) +1 other test skip
   [39]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_ccs@random-ccs-data-4-tiled-lnl-ccs.html

  * igt@kms_ccs@random-ccs-data-4-tiled-lnl-ccs@pipe-c-dp-2:
    - shard-bmg:          NOTRUN -> [SKIP][40] ([Intel XE#2652] / [Intel XE#787]) +12 other tests skip
   [40]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@kms_ccs@random-ccs-data-4-tiled-lnl-ccs@pipe-c-dp-2.html

  * igt@kms_chamelium_color@ctm-red-to-blue:
    - shard-bmg:          NOTRUN -> [SKIP][41] ([Intel XE#2325]) +1 other test skip
   [41]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_chamelium_color@ctm-red-to-blue.html

  * igt@kms_chamelium_color@degamma:
    - shard-dg2-set2:     NOTRUN -> [SKIP][42] ([Intel XE#306]) +1 other test skip
   [42]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@kms_chamelium_color@degamma.html

  * igt@kms_chamelium_color@gamma:
    - shard-lnl:          NOTRUN -> [SKIP][43] ([Intel XE#306])
   [43]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_chamelium_color@gamma.html

  * igt@kms_chamelium_hpd@dp-hpd-enable-disable-mode:
    - shard-lnl:          NOTRUN -> [SKIP][44] ([Intel XE#373]) +2 other tests skip
   [44]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@kms_chamelium_hpd@dp-hpd-enable-disable-mode.html

  * igt@kms_chamelium_hpd@dp-hpd-storm-disable:
    - shard-bmg:          NOTRUN -> [SKIP][45] ([Intel XE#2252]) +5 other tests skip
   [45]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_chamelium_hpd@dp-hpd-storm-disable.html

  * igt@kms_chamelium_hpd@vga-hpd-after-suspend:
    - shard-dg2-set2:     NOTRUN -> [SKIP][46] ([Intel XE#373])
   [46]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_chamelium_hpd@vga-hpd-after-suspend.html

  * igt@kms_content_protection@dp-mst-lic-type-1:
    - shard-lnl:          NOTRUN -> [SKIP][47] ([Intel XE#307])
   [47]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@kms_content_protection@dp-mst-lic-type-1.html

  * igt@kms_content_protection@mei-interface:
    - shard-bmg:          NOTRUN -> [SKIP][48] ([Intel XE#2341]) +1 other test skip
   [48]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-8/igt@kms_content_protection@mei-interface.html

  * igt@kms_content_protection@srm@pipe-a-dp-2:
    - shard-dg2-set2:     NOTRUN -> [FAIL][49] ([Intel XE#1178])
   [49]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_content_protection@srm@pipe-a-dp-2.html

  * igt@kms_cursor_crc@cursor-onscreen-512x512:
    - shard-dg2-set2:     NOTRUN -> [SKIP][50] ([Intel XE#308])
   [50]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_cursor_crc@cursor-onscreen-512x512.html

  * igt@kms_cursor_crc@cursor-random-32x32:
    - shard-bmg:          NOTRUN -> [SKIP][51] ([Intel XE#2320]) +3 other tests skip
   [51]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_cursor_crc@cursor-random-32x32.html

  * igt@kms_cursor_crc@cursor-random-max-size:
    - shard-lnl:          NOTRUN -> [SKIP][52] ([Intel XE#1424]) +2 other tests skip
   [52]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_cursor_crc@cursor-random-max-size.html

  * igt@kms_cursor_crc@cursor-sliding-512x512:
    - shard-bmg:          NOTRUN -> [SKIP][53] ([Intel XE#2321]) +1 other test skip
   [53]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_cursor_crc@cursor-sliding-512x512.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic:
    - shard-dg2-set2:     NOTRUN -> [SKIP][54] ([Intel XE#309])
   [54]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy:
    - shard-bmg:          [PASS][55] -> [SKIP][56] ([Intel XE#2291]) +2 other tests skip
   [55]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-3/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html
   [56]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - shard-bmg:          NOTRUN -> [SKIP][57] ([Intel XE#2286]) +1 other test skip
   [57]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-varying-size:
    - shard-dg2-set2:     NOTRUN -> [SKIP][58] ([Intel XE#323])
   [58]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-varying-size.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size:
    - shard-bmg:          NOTRUN -> [SKIP][59] ([Intel XE#2291]) +2 other tests skip
   [59]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_cursor_legacy@cursora-vs-flipb-atomic-transitions-varying-size.html

  * igt@kms_dither@fb-8bpc-vs-panel-6bpc@pipe-a-hdmi-a-2:
    - shard-dg2-set2:     NOTRUN -> [SKIP][60] ([Intel XE#4494])
   [60]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_dither@fb-8bpc-vs-panel-6bpc@pipe-a-hdmi-a-2.html

  * igt@kms_dither@fb-8bpc-vs-panel-6bpc@pipe-a-hdmi-a-3:
    - shard-bmg:          NOTRUN -> [SKIP][61] ([Intel XE#1340])
   [61]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-3/igt@kms_dither@fb-8bpc-vs-panel-6bpc@pipe-a-hdmi-a-3.html

  * igt@kms_dp_linktrain_fallback@dp-fallback:
    - shard-dg2-set2:     NOTRUN -> [SKIP][62] ([Intel XE#4331])
   [62]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_dp_linktrain_fallback@dp-fallback.html

  * igt@kms_dsc@dsc-fractional-bpp-with-bpc:
    - shard-bmg:          NOTRUN -> [SKIP][63] ([Intel XE#2244])
   [63]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_dsc@dsc-fractional-bpp-with-bpc.html

  * igt@kms_dsc@dsc-with-bpc-formats:
    - shard-dg2-set2:     NOTRUN -> [SKIP][64] ([Intel XE#455]) +9 other tests skip
   [64]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_dsc@dsc-with-bpc-formats.html

  * igt@kms_fbc_dirty_rect@fbc-dirty-rectangle-dirtyfb-tests:
    - shard-bmg:          NOTRUN -> [SKIP][65] ([Intel XE#4422]) +2 other tests skip
   [65]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-3/igt@kms_fbc_dirty_rect@fbc-dirty-rectangle-dirtyfb-tests.html

  * igt@kms_feature_discovery@display-2x:
    - shard-dg2-set2:     [PASS][66] -> [SKIP][67] ([Intel XE#702])
   [66]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@kms_feature_discovery@display-2x.html
   [67]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_feature_discovery@display-2x.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@bd-hdmi-a6-dp4:
    - shard-dg2-set2:     [PASS][68] -> [FAIL][69] ([Intel XE#301] / [Intel XE#3321])
   [68]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@bd-hdmi-a6-dp4.html
   [69]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@bd-hdmi-a6-dp4.html

  * igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a6-dp4:
    - shard-dg2-set2:     [PASS][70] -> [FAIL][71] ([Intel XE#301]) +2 other tests fail
   [70]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a6-dp4.html
   [71]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a6-dp4.html

  * igt@kms_flip@2x-flip-vs-panning-vs-hang:
    - shard-dg2-set2:     [PASS][72] -> [SKIP][73] ([Intel XE#310]) +2 other tests skip
   [72]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@kms_flip@2x-flip-vs-panning-vs-hang.html
   [73]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_flip@2x-flip-vs-panning-vs-hang.html

  * igt@kms_flip@2x-flip-vs-rmfb-interruptible:
    - shard-lnl:          NOTRUN -> [SKIP][74] ([Intel XE#1421]) +6 other tests skip
   [74]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-1/igt@kms_flip@2x-flip-vs-rmfb-interruptible.html
    - shard-bmg:          NOTRUN -> [SKIP][75] ([Intel XE#2316]) +1 other test skip
   [75]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@kms_flip@2x-flip-vs-rmfb-interruptible.html

  * igt@kms_flip@2x-plain-flip-ts-check-interruptible:
    - shard-bmg:          [PASS][76] -> [SKIP][77] ([Intel XE#2316]) +8 other tests skip
   [76]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-2/igt@kms_flip@2x-plain-flip-ts-check-interruptible.html
   [77]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_flip@2x-plain-flip-ts-check-interruptible.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-hdmi-a6:
    - shard-dg2-set2:     NOTRUN -> [FAIL][78] ([Intel XE#301])
   [78]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-hdmi-a6.html

  * igt@kms_flip@flip-vs-suspend-interruptible@d-dp4:
    - shard-dg2-set2:     NOTRUN -> [INCOMPLETE][79] ([Intel XE#2049] / [Intel XE#2597])
   [79]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_flip@flip-vs-suspend-interruptible@d-dp4.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1:
    - shard-lnl:          [PASS][80] -> [FAIL][81] ([Intel XE#886]) +3 other tests fail
   [80]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-8/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1.html
   [81]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1.html

  * igt@kms_flip@wf_vblank-ts-check-interruptible:
    - shard-dg2-set2:     [PASS][82] -> [FAIL][83] ([Intel XE#2882])
   [82]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@kms_flip@wf_vblank-ts-check-interruptible.html
   [83]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_flip@wf_vblank-ts-check-interruptible.html

  * igt@kms_flip@wf_vblank-ts-check-interruptible@a-hdmi-a6:
    - shard-dg2-set2:     [PASS][84] -> [FAIL][85] ([Intel XE#886])
   [84]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@kms_flip@wf_vblank-ts-check-interruptible@a-hdmi-a6.html
   [85]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_flip@wf_vblank-ts-check-interruptible@a-hdmi-a6.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling:
    - shard-bmg:          NOTRUN -> [SKIP][86] ([Intel XE#2293] / [Intel XE#2380]) +6 other tests skip
   [86]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling.html
    - shard-lnl:          NOTRUN -> [SKIP][87] ([Intel XE#1401] / [Intel XE#1745])
   [87]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling@pipe-a-default-mode:
    - shard-lnl:          NOTRUN -> [SKIP][88] ([Intel XE#1401])
   [88]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling@pipe-a-default-mode.html

  * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling@pipe-a-valid-mode:
    - shard-bmg:          NOTRUN -> [SKIP][89] ([Intel XE#2293]) +6 other tests skip
   [89]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling@pipe-a-valid-mode.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-onoff:
    - shard-dg2-set2:     [PASS][90] -> [SKIP][91] ([Intel XE#656]) +7 other tests skip
   [90]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-onoff.html
   [91]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-onoff.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-move:
    - shard-dg2-set2:     NOTRUN -> [SKIP][92] ([Intel XE#656]) +2 other tests skip
   [92]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-move.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-onoff:
    - shard-bmg:          NOTRUN -> [SKIP][93] ([Intel XE#4141]) +9 other tests skip
   [93]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-onoff.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-indfb-msflip-blt:
    - shard-lnl:          NOTRUN -> [SKIP][94] ([Intel XE#651]) +3 other tests skip
   [94]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@kms_frontbuffer_tracking@fbcdrrs-1p-primscrn-indfb-msflip-blt.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-2p-scndscrn-cur-indfb-draw-mmap-wc:
    - shard-bmg:          NOTRUN -> [SKIP][95] ([Intel XE#2311]) +22 other tests skip
   [95]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@kms_frontbuffer_tracking@fbcdrrs-2p-scndscrn-cur-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcdrrs-2p-scndscrn-pri-indfb-draw-mmap-wc:
    - shard-dg2-set2:     NOTRUN -> [SKIP][96] ([Intel XE#651]) +11 other tests skip
   [96]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@kms_frontbuffer_tracking@fbcdrrs-2p-scndscrn-pri-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-render:
    - shard-bmg:          NOTRUN -> [SKIP][97] ([Intel XE#2312]) +15 other tests skip
   [97]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-pri-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-blt:
    - shard-bmg:          NOTRUN -> [SKIP][98] ([Intel XE#2313]) +21 other tests skip
   [98]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-cur-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@plane-fbc-rte:
    - shard-bmg:          NOTRUN -> [SKIP][99] ([Intel XE#4439])
   [99]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_frontbuffer_tracking@plane-fbc-rte.html

  * igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-indfb-draw-blt:
    - shard-dg2-set2:     NOTRUN -> [SKIP][100] ([Intel XE#653]) +12 other tests skip
   [100]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-blt:
    - shard-lnl:          NOTRUN -> [SKIP][101] ([Intel XE#656]) +9 other tests skip
   [101]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-blt.html

  * igt@kms_plane_cursor@overlay:
    - shard-dg2-set2:     [PASS][102] -> [FAIL][103] ([Intel XE#616])
   [102]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_plane_cursor@overlay.html
   [103]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_plane_cursor@overlay.html

  * igt@kms_plane_cursor@overlay@pipe-a-hdmi-a-2-size-64:
    - shard-dg2-set2:     NOTRUN -> [FAIL][104] ([Intel XE#616])
   [104]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_plane_cursor@overlay@pipe-a-hdmi-a-2-size-64.html

  * igt@kms_plane_multiple@2x-tiling-x:
    - shard-bmg:          NOTRUN -> [SKIP][105] ([Intel XE#4596])
   [105]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_plane_multiple@2x-tiling-x.html

  * igt@kms_plane_scaling@intel-max-src-size@pipe-a-dp-4:
    - shard-dg2-set2:     NOTRUN -> [DMESG-WARN][106] ([Intel XE#4212])
   [106]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_plane_scaling@intel-max-src-size@pipe-a-dp-4.html

  * igt@kms_plane_scaling@intel-max-src-size@pipe-a-hdmi-a-6:
    - shard-dg2-set2:     NOTRUN -> [ABORT][107] ([Intel XE#4540]) +1 other test abort
   [107]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_plane_scaling@intel-max-src-size@pipe-a-hdmi-a-6.html

  * igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-25@pipe-b:
    - shard-bmg:          NOTRUN -> [SKIP][108] ([Intel XE#2763]) +14 other tests skip
   [108]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-25@pipe-b.html

  * igt@kms_pm_backlight@fade-with-suspend:
    - shard-bmg:          NOTRUN -> [SKIP][109] ([Intel XE#870])
   [109]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-8/igt@kms_pm_backlight@fade-with-suspend.html

  * igt@kms_pm_dc@dc3co-vpb-simulation:
    - shard-bmg:          NOTRUN -> [SKIP][110] ([Intel XE#2391])
   [110]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_pm_dc@dc3co-vpb-simulation.html

  * igt@kms_pm_dc@dc5-dpms:
    - shard-lnl:          [PASS][111] -> [FAIL][112] ([Intel XE#718])
   [111]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-5/igt@kms_pm_dc@dc5-dpms.html
   [112]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@kms_pm_dc@dc5-dpms.html

  * igt@kms_pm_dc@deep-pkgc:
    - shard-bmg:          NOTRUN -> [SKIP][113] ([Intel XE#2505])
   [113]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_pm_dc@deep-pkgc.html

  * igt@kms_pm_rpm@dpms-mode-unset-lpsp:
    - shard-bmg:          NOTRUN -> [SKIP][114] ([Intel XE#1439] / [Intel XE#836])
   [114]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_pm_rpm@dpms-mode-unset-lpsp.html

  * igt@kms_psr2_sf@fbc-pr-cursor-plane-move-continuous-sf:
    - shard-bmg:          NOTRUN -> [SKIP][115] ([Intel XE#1489]) +5 other tests skip
   [115]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_psr2_sf@fbc-pr-cursor-plane-move-continuous-sf.html

  * igt@kms_psr2_sf@fbc-pr-cursor-plane-update-sf:
    - shard-lnl:          NOTRUN -> [SKIP][116] ([Intel XE#2893]) +1 other test skip
   [116]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@kms_psr2_sf@fbc-pr-cursor-plane-update-sf.html

  * igt@kms_psr2_sf@fbc-psr2-cursor-plane-move-continuous-exceed-sf@pipe-b-edp-1:
    - shard-lnl:          NOTRUN -> [FAIL][117] ([Intel XE#4570]) +2 other tests fail
   [117]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@kms_psr2_sf@fbc-psr2-cursor-plane-move-continuous-exceed-sf@pipe-b-edp-1.html

  * igt@kms_psr2_sf@fbc-psr2-primary-plane-update-sf-dmg-area:
    - shard-dg2-set2:     NOTRUN -> [SKIP][118] ([Intel XE#1489]) +1 other test skip
   [118]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@kms_psr2_sf@fbc-psr2-primary-plane-update-sf-dmg-area.html

  * igt@kms_psr@fbc-psr2-basic@edp-1:
    - shard-lnl:          NOTRUN -> [FAIL][119] ([Intel XE#4568]) +1 other test fail
   [119]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_psr@fbc-psr2-basic@edp-1.html

  * igt@kms_psr@fbc-psr2-cursor-plane-move:
    - shard-bmg:          NOTRUN -> [SKIP][120] ([Intel XE#2234] / [Intel XE#2850]) +12 other tests skip
   [120]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_psr@fbc-psr2-cursor-plane-move.html

  * igt@kms_psr@pr-primary-page-flip:
    - shard-lnl:          NOTRUN -> [SKIP][121] ([Intel XE#1406]) +1 other test skip
   [121]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@kms_psr@pr-primary-page-flip.html

  * igt@kms_psr@pr-sprite-plane-move:
    - shard-dg2-set2:     NOTRUN -> [SKIP][122] ([Intel XE#2850] / [Intel XE#929]) +6 other tests skip
   [122]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_psr@pr-sprite-plane-move.html

  * igt@kms_psr_stress_test@flip-primary-invalidate-overlay:
    - shard-bmg:          NOTRUN -> [SKIP][123] ([Intel XE#2414])
   [123]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_psr_stress_test@flip-primary-invalidate-overlay.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][124] ([Intel XE#2939])
   [124]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_psr_stress_test@flip-primary-invalidate-overlay.html

  * igt@kms_rotation_crc@bad-pixel-format:
    - shard-bmg:          NOTRUN -> [SKIP][125] ([Intel XE#3414] / [Intel XE#3904])
   [125]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-3/igt@kms_rotation_crc@bad-pixel-format.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - shard-bmg:          NOTRUN -> [SKIP][126] ([Intel XE#1435]) +2 other tests skip
   [126]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@kms_tiled_display@basic-test-pattern-with-chamelium:
    - shard-bmg:          NOTRUN -> [SKIP][127] ([Intel XE#2509])
   [127]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-1/igt@kms_tiled_display@basic-test-pattern-with-chamelium.html

  * igt@kms_universal_plane@cursor-fb-leak@pipe-a-edp-1:
    - shard-lnl:          [PASS][128] -> [FAIL][129] ([Intel XE#771])
   [128]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-5/igt@kms_universal_plane@cursor-fb-leak@pipe-a-edp-1.html
   [129]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_universal_plane@cursor-fb-leak@pipe-a-edp-1.html

  * igt@kms_vrr@cmrr@pipe-a-edp-1:
    - shard-lnl:          [PASS][130] -> [FAIL][131] ([Intel XE#4459]) +1 other test fail
   [130]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-1/igt@kms_vrr@cmrr@pipe-a-edp-1.html
   [131]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@kms_vrr@cmrr@pipe-a-edp-1.html

  * igt@kms_vrr@max-min:
    - shard-bmg:          NOTRUN -> [SKIP][132] ([Intel XE#1499])
   [132]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_vrr@max-min.html

  * igt@kms_vrr@seamless-rr-switch-vrr:
    - shard-lnl:          NOTRUN -> [SKIP][133] ([Intel XE#1499])
   [133]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@kms_vrr@seamless-rr-switch-vrr.html

  * igt@kms_writeback@writeback-check-output-xrgb2101010:
    - shard-bmg:          NOTRUN -> [SKIP][134] ([Intel XE#756]) +1 other test skip
   [134]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_writeback@writeback-check-output-xrgb2101010.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][135] ([Intel XE#756])
   [135]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_writeback@writeback-check-output-xrgb2101010.html

  * igt@xe_eudebug@basic-vm-access-parameters:
    - shard-dg2-set2:     NOTRUN -> [SKIP][136] ([Intel XE#2905]) +6 other tests skip
   [136]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@xe_eudebug@basic-vm-access-parameters.html

  * igt@xe_eudebug@basic-vm-access-parameters-userptr:
    - shard-lnl:          NOTRUN -> [SKIP][137] ([Intel XE#2905] / [Intel XE#3889])
   [137]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_eudebug@basic-vm-access-parameters-userptr.html

  * igt@xe_eudebug@discovery-race-sigint:
    - shard-bmg:          NOTRUN -> [SKIP][138] ([Intel XE#2905] / [Intel XE#4259])
   [138]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@xe_eudebug@discovery-race-sigint.html

  * igt@xe_eudebug@multigpu-basic-client-many:
    - shard-lnl:          NOTRUN -> [SKIP][139] ([Intel XE#2905])
   [139]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_eudebug@multigpu-basic-client-many.html

  * igt@xe_eudebug@read-metadata:
    - shard-bmg:          NOTRUN -> [SKIP][140] ([Intel XE#2905]) +8 other tests skip
   [140]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@xe_eudebug@read-metadata.html

  * igt@xe_eudebug_online@set-breakpoint-sigint-debugger:
    - shard-bmg:          NOTRUN -> [SKIP][141] ([Intel XE#4577])
   [141]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@xe_eudebug_online@set-breakpoint-sigint-debugger.html
    - shard-lnl:          NOTRUN -> [SKIP][142] ([Intel XE#4577])
   [142]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-1/igt@xe_eudebug_online@set-breakpoint-sigint-debugger.html

  * igt@xe_eudebug_sriov@deny-eudebug:
    - shard-lnl:          NOTRUN -> [SKIP][143] ([Intel XE#4518])
   [143]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_eudebug_sriov@deny-eudebug.html
    - shard-bmg:          NOTRUN -> [SKIP][144] ([Intel XE#4518])
   [144]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@xe_eudebug_sriov@deny-eudebug.html

  * igt@xe_eudebug_sriov@deny-sriov:
    - shard-dg2-set2:     NOTRUN -> [SKIP][145] ([Intel XE#4518])
   [145]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_eudebug_sriov@deny-sriov.html

  * igt@xe_evict@evict-beng-large-external-cm:
    - shard-lnl:          NOTRUN -> [SKIP][146] ([Intel XE#688])
   [146]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@xe_evict@evict-beng-large-external-cm.html

  * igt@xe_exec_basic@multigpu-many-execqueues-many-vm-bindexecqueue-userptr:
    - shard-bmg:          NOTRUN -> [SKIP][147] ([Intel XE#2322]) +8 other tests skip
   [147]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-bindexecqueue-userptr.html

  * igt@xe_exec_basic@multigpu-many-execqueues-many-vm-null-defer-bind:
    - shard-dg2-set2:     [PASS][148] -> [SKIP][149] ([Intel XE#1392]) +3 other tests skip
   [148]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-null-defer-bind.html
   [149]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-null-defer-bind.html

  * igt@xe_exec_basic@multigpu-no-exec-basic-defer-bind:
    - shard-lnl:          NOTRUN -> [SKIP][150] ([Intel XE#1392]) +1 other test skip
   [150]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@xe_exec_basic@multigpu-no-exec-basic-defer-bind.html

  * igt@xe_exec_fault_mode@many-execqueues-userptr-invalidate-imm:
    - shard-dg2-set2:     NOTRUN -> [SKIP][151] ([Intel XE#288]) +12 other tests skip
   [151]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_exec_fault_mode@many-execqueues-userptr-invalidate-imm.html

  * igt@xe_exec_mix_modes@exec-spinner-interrupted-lr:
    - shard-dg2-set2:     NOTRUN -> [SKIP][152] ([Intel XE#2360])
   [152]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@xe_exec_mix_modes@exec-spinner-interrupted-lr.html

  * igt@xe_exec_threads@threads-hang-userptr-invalidate:
    - shard-dg2-set2:     [PASS][153] -> [DMESG-WARN][154] ([Intel XE#3876])
   [153]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@xe_exec_threads@threads-hang-userptr-invalidate.html
   [154]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@xe_exec_threads@threads-hang-userptr-invalidate.html

  * igt@xe_fault_injection@oa-add-config-fail-xe_oa_alloc_regs:
    - shard-bmg:          NOTRUN -> [FAIL][155] ([Intel XE#3903])
   [155]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@xe_fault_injection@oa-add-config-fail-xe_oa_alloc_regs.html

  * igt@xe_live_ktest@xe_mocs@xe_live_mocs_kernel_kunit:
    - shard-dg2-set2:     [PASS][156] -> [FAIL][157] ([Intel XE#1999]) +2 other tests fail
   [156]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-432/igt@xe_live_ktest@xe_mocs@xe_live_mocs_kernel_kunit.html
   [157]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@xe_live_ktest@xe_mocs@xe_live_mocs_kernel_kunit.html

  * igt@xe_module_load@load:
    - shard-lnl:          ([PASS][158], [PASS][159], [PASS][160], [PASS][161], [PASS][162], [PASS][163], [PASS][164], [PASS][165], [PASS][166], [PASS][167], [PASS][168], [PASS][169], [PASS][170], [PASS][171], [PASS][172], [PASS][173], [PASS][174], [PASS][175], [PASS][176], [PASS][177], [PASS][178], [PASS][179], [PASS][180], [PASS][181], [PASS][182]) -> ([PASS][183], [PASS][184], [PASS][185], [PASS][186], [PASS][187], [PASS][188], [PASS][189], [PASS][190], [PASS][191], [PASS][192], [PASS][193], [PASS][194], [PASS][195], [SKIP][196], [PASS][197], [PASS][198], [PASS][199], [PASS][200], [PASS][201], [PASS][202], [PASS][203], [PASS][204], [PASS][205], [PASS][206], [PASS][207], [PASS][208]) ([Intel XE#378])
   [158]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-5/igt@xe_module_load@load.html
   [159]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-7/igt@xe_module_load@load.html
   [160]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-6/igt@xe_module_load@load.html
   [161]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-4/igt@xe_module_load@load.html
   [162]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-4/igt@xe_module_load@load.html
   [163]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-1/igt@xe_module_load@load.html
   [164]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-5/igt@xe_module_load@load.html
   [165]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-3/igt@xe_module_load@load.html
   [166]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-8/igt@xe_module_load@load.html
   [167]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-3/igt@xe_module_load@load.html
   [168]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-1/igt@xe_module_load@load.html
   [169]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-3/igt@xe_module_load@load.html
   [170]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-6/igt@xe_module_load@load.html
   [171]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-8/igt@xe_module_load@load.html
   [172]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-2/igt@xe_module_load@load.html
   [173]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-8/igt@xe_module_load@load.html
   [174]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-7/igt@xe_module_load@load.html
   [175]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-2/igt@xe_module_load@load.html
   [176]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-4/igt@xe_module_load@load.html
   [177]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-7/igt@xe_module_load@load.html
   [178]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-3/igt@xe_module_load@load.html
   [179]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-5/igt@xe_module_load@load.html
   [180]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-6/igt@xe_module_load@load.html
   [181]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-2/igt@xe_module_load@load.html
   [182]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-1/igt@xe_module_load@load.html
   [183]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@xe_module_load@load.html
   [184]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-1/igt@xe_module_load@load.html
   [185]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-1/igt@xe_module_load@load.html
   [186]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-1/igt@xe_module_load@load.html
   [187]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@xe_module_load@load.html
   [188]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@xe_module_load@load.html
   [189]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@xe_module_load@load.html
   [190]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@xe_module_load@load.html
   [191]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-8/igt@xe_module_load@load.html
   [192]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_module_load@load.html
   [193]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_module_load@load.html
   [194]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_module_load@load.html
   [195]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@xe_module_load@load.html
   [196]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@xe_module_load@load.html
   [197]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@xe_module_load@load.html
   [198]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@xe_module_load@load.html
   [199]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@xe_module_load@load.html
   [200]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@xe_module_load@load.html
   [201]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@xe_module_load@load.html
   [202]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@xe_module_load@load.html
   [203]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@xe_module_load@load.html
   [204]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@xe_module_load@load.html
   [205]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-6/igt@xe_module_load@load.html
   [206]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@xe_module_load@load.html
   [207]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@xe_module_load@load.html
   [208]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@xe_module_load@load.html

  * igt@xe_oa@create-destroy-userspace-config:
    - shard-dg2-set2:     NOTRUN -> [SKIP][209] ([Intel XE#2541] / [Intel XE#3573]) +2 other tests skip
   [209]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@xe_oa@create-destroy-userspace-config.html

  * igt@xe_oa@syncs-syncobj-wait:
    - shard-dg2-set2:     NOTRUN -> [SKIP][210] ([Intel XE#2541] / [Intel XE#3573] / [Intel XE#4501])
   [210]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_oa@syncs-syncobj-wait.html

  * igt@xe_peer2peer@read@read-gpua-vram01-gpub-system-p2p:
    - shard-dg2-set2:     NOTRUN -> [FAIL][211] ([Intel XE#1173])
   [211]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_peer2peer@read@read-gpua-vram01-gpub-system-p2p.html

  * igt@xe_pm@d3cold-mocs:
    - shard-dg2-set2:     NOTRUN -> [SKIP][212] ([Intel XE#2284])
   [212]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@xe_pm@d3cold-mocs.html

  * igt@xe_pm@s3-d3cold-basic-exec:
    - shard-bmg:          NOTRUN -> [SKIP][213] ([Intel XE#2284])
   [213]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-8/igt@xe_pm@s3-d3cold-basic-exec.html
    - shard-dg2-set2:     NOTRUN -> [SKIP][214] ([Intel XE#2284] / [Intel XE#366])
   [214]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@xe_pm@s3-d3cold-basic-exec.html

  * igt@xe_pm@s3-d3hot-basic-exec:
    - shard-lnl:          NOTRUN -> [SKIP][215] ([Intel XE#584])
   [215]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-1/igt@xe_pm@s3-d3hot-basic-exec.html

  * igt@xe_pm@s4-multiple-execs:
    - shard-lnl:          [PASS][216] -> [ABORT][217] ([Intel XE#1794])
   [216]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-3/igt@xe_pm@s4-multiple-execs.html
   [217]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-2/igt@xe_pm@s4-multiple-execs.html

  * igt@xe_query@multigpu-query-invalid-extension:
    - shard-bmg:          NOTRUN -> [SKIP][218] ([Intel XE#944]) +3 other tests skip
   [218]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@xe_query@multigpu-query-invalid-extension.html
    - shard-lnl:          NOTRUN -> [SKIP][219] ([Intel XE#944]) +1 other test skip
   [219]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-4/igt@xe_query@multigpu-query-invalid-extension.html

  * igt@xe_query@multigpu-query-uc-fw-version-guc:
    - shard-dg2-set2:     NOTRUN -> [SKIP][220] ([Intel XE#944]) +1 other test skip
   [220]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@xe_query@multigpu-query-uc-fw-version-guc.html

  * igt@xe_sriov_auto_provisioning@fair-allocation:
    - shard-lnl:          NOTRUN -> [SKIP][221] ([Intel XE#4130])
   [221]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-7/igt@xe_sriov_auto_provisioning@fair-allocation.html

  * igt@xe_sriov_flr@flr-vf1-clear:
    - shard-bmg:          NOTRUN -> [SKIP][222] ([Intel XE#3342])
   [222]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-3/igt@xe_sriov_flr@flr-vf1-clear.html

  * igt@xe_sriov_scheduling@equal-throughput:
    - shard-bmg:          NOTRUN -> [SKIP][223] ([Intel XE#4351])
   [223]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-4/igt@xe_sriov_scheduling@equal-throughput.html

  
#### Possible fixes ####

  * igt@kms_bw@connected-linear-tiling-2-displays-2160x1440p:
    - shard-dg2-set2:     [SKIP][224] ([Intel XE#2191]) -> [PASS][225]
   [224]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_bw@connected-linear-tiling-2-displays-2160x1440p.html
   [225]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@kms_bw@connected-linear-tiling-2-displays-2160x1440p.html

  * igt@kms_cursor_legacy@2x-cursor-vs-flip-legacy:
    - shard-dg2-set2:     [SKIP][226] ([Intel XE#309]) -> [PASS][227] +2 other tests pass
   [226]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_cursor_legacy@2x-cursor-vs-flip-legacy.html
   [227]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_cursor_legacy@2x-cursor-vs-flip-legacy.html

  * igt@kms_cursor_legacy@cursorb-vs-flipb-toggle:
    - shard-bmg:          [SKIP][228] ([Intel XE#2291]) -> [PASS][229] +4 other tests pass
   [228]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-4/igt@kms_cursor_legacy@cursorb-vs-flipb-toggle.html
   [229]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_cursor_legacy@cursorb-vs-flipb-toggle.html

  * igt@kms_display_modes@extended-mode-basic:
    - shard-bmg:          [SKIP][230] ([Intel XE#4302]) -> [PASS][231]
   [230]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-6/igt@kms_display_modes@extended-mode-basic.html
   [231]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_display_modes@extended-mode-basic.html

  * igt@kms_dither@fb-8bpc-vs-panel-6bpc:
    - shard-dg2-set2:     [SKIP][232] ([Intel XE#455]) -> [PASS][233] +2 other tests pass
   [232]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_dither@fb-8bpc-vs-panel-6bpc.html
   [233]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_dither@fb-8bpc-vs-panel-6bpc.html

  * igt@kms_flip@2x-flip-vs-expired-vblank@ac-dp2-hdmi-a3:
    - shard-bmg:          [FAIL][234] ([Intel XE#3321]) -> [PASS][235] +1 other test pass
   [234]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-1/igt@kms_flip@2x-flip-vs-expired-vblank@ac-dp2-hdmi-a3.html
   [235]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-8/igt@kms_flip@2x-flip-vs-expired-vblank@ac-dp2-hdmi-a3.html

  * igt@kms_flip@2x-plain-flip-fb-recreate:
    - shard-bmg:          [SKIP][236] ([Intel XE#2316]) -> [PASS][237] +3 other tests pass
   [236]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-4/igt@kms_flip@2x-plain-flip-fb-recreate.html
   [237]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@kms_flip@2x-plain-flip-fb-recreate.html

  * igt@kms_flip@2x-plain-flip-ts-check:
    - shard-dg2-set2:     [SKIP][238] ([Intel XE#310]) -> [PASS][239]
   [238]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_flip@2x-plain-flip-ts-check.html
   [239]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_flip@2x-plain-flip-ts-check.html

  * igt@kms_flip@flip-vs-expired-vblank@a-dp4:
    - shard-dg2-set2:     [FAIL][240] ([Intel XE#301] / [Intel XE#3321]) -> [PASS][241] +1 other test pass
   [240]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@kms_flip@flip-vs-expired-vblank@a-dp4.html
   [241]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_flip@flip-vs-expired-vblank@a-dp4.html

  * igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a6:
    - shard-dg2-set2:     [FAIL][242] ([Intel XE#301]) -> [PASS][243] +4 other tests pass
   [242]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a6.html
   [243]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a6.html

  * igt@kms_flip@flip-vs-suspend-interruptible@c-dp4:
    - shard-dg2-set2:     [INCOMPLETE][244] ([Intel XE#2049] / [Intel XE#2597]) -> [PASS][245] +1 other test pass
   [244]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@kms_flip@flip-vs-suspend-interruptible@c-dp4.html
   [245]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@kms_flip@flip-vs-suspend-interruptible@c-dp4.html

  * igt@kms_frontbuffer_tracking@fbc-2p-rte:
    - shard-dg2-set2:     [SKIP][246] ([Intel XE#656]) -> [PASS][247] +2 other tests pass
   [246]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_frontbuffer_tracking@fbc-2p-rte.html
   [247]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_frontbuffer_tracking@fbc-2p-rte.html

  * igt@kms_plane_multiple@2x-tiling-x:
    - shard-dg2-set2:     [SKIP][248] ([Intel XE#4596]) -> [PASS][249]
   [248]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_plane_multiple@2x-tiling-x.html
   [249]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@kms_plane_multiple@2x-tiling-x.html

  * igt@kms_pm_rpm@modeset-non-lpsp:
    - shard-dg2-set2:     [SKIP][250] ([Intel XE#836]) -> [PASS][251] +1 other test pass
   [250]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_pm_rpm@modeset-non-lpsp.html
   [251]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@kms_pm_rpm@modeset-non-lpsp.html

  * igt@kms_setmode@basic@pipe-b-edp-1:
    - shard-lnl:          [FAIL][252] ([Intel XE#2883]) -> [PASS][253] +2 other tests pass
   [252]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-3/igt@kms_setmode@basic@pipe-b-edp-1.html
   [253]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_setmode@basic@pipe-b-edp-1.html

  * igt@kms_universal_plane@cursor-fb-leak@pipe-c-edp-1:
    - shard-lnl:          [FAIL][254] ([Intel XE#771]) -> [PASS][255]
   [254]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-5/igt@kms_universal_plane@cursor-fb-leak@pipe-c-edp-1.html
   [255]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-5/igt@kms_universal_plane@cursor-fb-leak@pipe-c-edp-1.html

  * igt@xe_exec_basic@multigpu-many-execqueues-many-vm-basic-defer-bind:
    - shard-dg2-set2:     [SKIP][256] ([Intel XE#1392]) -> [PASS][257] +5 other tests pass
   [256]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-432/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-basic-defer-bind.html
   [257]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@xe_exec_basic@multigpu-many-execqueues-many-vm-basic-defer-bind.html

  * igt@xe_module_load@load:
    - shard-dg2-set2:     ([PASS][258], [PASS][259], [PASS][260], [PASS][261], [PASS][262], [PASS][263], [PASS][264], [PASS][265], [PASS][266], [PASS][267], [PASS][268], [PASS][269], [PASS][270], [PASS][271], [PASS][272], [SKIP][273], [PASS][274], [PASS][275], [PASS][276], [PASS][277], [PASS][278], [PASS][279], [PASS][280], [PASS][281], [PASS][282], [PASS][283]) ([Intel XE#378]) -> ([PASS][284], [PASS][285], [PASS][286], [PASS][287], [PASS][288], [PASS][289], [PASS][290], [PASS][291], [PASS][292], [PASS][293], [PASS][294], [PASS][295], [PASS][296], [PASS][297], [PASS][298], [PASS][299], [PASS][300], [PASS][301], [PASS][302], [PASS][303], [PASS][304], [PASS][305], [PASS][306], [PASS][307])
   [258]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-466/igt@xe_module_load@load.html
   [259]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@xe_module_load@load.html
   [260]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@xe_module_load@load.html
   [261]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@xe_module_load@load.html
   [262]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@xe_module_load@load.html
   [263]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@xe_module_load@load.html
   [264]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-466/igt@xe_module_load@load.html
   [265]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@xe_module_load@load.html
   [266]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@xe_module_load@load.html
   [267]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@xe_module_load@load.html
   [268]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@xe_module_load@load.html
   [269]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@xe_module_load@load.html
   [270]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-432/igt@xe_module_load@load.html
   [271]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@xe_module_load@load.html
   [272]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@xe_module_load@load.html
   [273]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@xe_module_load@load.html
   [274]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@xe_module_load@load.html
   [275]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@xe_module_load@load.html
   [276]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@xe_module_load@load.html
   [277]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-432/igt@xe_module_load@load.html
   [278]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@xe_module_load@load.html
   [279]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@xe_module_load@load.html
   [280]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-432/igt@xe_module_load@load.html
   [281]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@xe_module_load@load.html
   [282]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@xe_module_load@load.html
   [283]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@xe_module_load@load.html
   [284]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@xe_module_load@load.html
   [285]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@xe_module_load@load.html
   [286]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@xe_module_load@load.html
   [287]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@xe_module_load@load.html
   [288]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@xe_module_load@load.html
   [289]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@xe_module_load@load.html
   [290]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@xe_module_load@load.html
   [291]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-435/igt@xe_module_load@load.html
   [292]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@xe_module_load@load.html
   [293]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@xe_module_load@load.html
   [294]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-466/igt@xe_module_load@load.html
   [295]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@xe_module_load@load.html
   [296]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@xe_module_load@load.html
   [297]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@xe_module_load@load.html
   [298]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-434/igt@xe_module_load@load.html
   [299]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@xe_module_load@load.html
   [300]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@xe_module_load@load.html
   [301]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@xe_module_load@load.html
   [302]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_module_load@load.html
   [303]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_module_load@load.html
   [304]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@xe_module_load@load.html
   [305]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_module_load@load.html
   [306]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@xe_module_load@load.html
   [307]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@xe_module_load@load.html

  * igt@xe_pm@s4-vm-bind-unbind-all:
    - shard-lnl:          [ABORT][308] ([Intel XE#1794]) -> [PASS][309]
   [308]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-lnl-2/igt@xe_pm@s4-vm-bind-unbind-all.html
   [309]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-lnl-3/igt@xe_pm@s4-vm-bind-unbind-all.html

  
#### Warnings ####

  * igt@kms_ccs@bad-rotation-90-y-tiled-gen12-mc-ccs@pipe-d-hdmi-a-6:
    - shard-dg2-set2:     [SKIP][310] ([Intel XE#787]) -> [SKIP][311] ([Intel XE#455] / [Intel XE#787]) +5 other tests skip
   [310]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-436/igt@kms_ccs@bad-rotation-90-y-tiled-gen12-mc-ccs@pipe-d-hdmi-a-6.html
   [311]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_ccs@bad-rotation-90-y-tiled-gen12-mc-ccs@pipe-d-hdmi-a-6.html

  * igt@kms_ccs@ccs-on-another-bo-y-tiled-ccs@pipe-d-hdmi-a-6:
    - shard-dg2-set2:     [SKIP][312] ([Intel XE#455] / [Intel XE#787]) -> [SKIP][313] ([Intel XE#787]) +9 other tests skip
   [312]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_ccs@ccs-on-another-bo-y-tiled-ccs@pipe-d-hdmi-a-6.html
   [313]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_ccs@ccs-on-another-bo-y-tiled-ccs@pipe-d-hdmi-a-6.html

  * igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs:
    - shard-dg2-set2:     [INCOMPLETE][314] ([Intel XE#2705] / [Intel XE#4212] / [Intel XE#4345]) -> [INCOMPLETE][315] ([Intel XE#1727] / [Intel XE#3113] / [Intel XE#3124] / [Intel XE#4345])
   [314]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-434/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs.html
   [315]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-463/igt@kms_ccs@random-ccs-data-4-tiled-dg2-mc-ccs.html

  * igt@kms_cdclk@mode-transition-all-outputs:
    - shard-dg2-set2:     [SKIP][316] ([Intel XE#4440]) -> [SKIP][317] ([Intel XE#4418])
   [316]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_cdclk@mode-transition-all-outputs.html
   [317]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_cdclk@mode-transition-all-outputs.html

  * igt@kms_content_protection@srm:
    - shard-dg2-set2:     [SKIP][318] ([Intel XE#455]) -> [FAIL][319] ([Intel XE#1178])
   [318]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_content_protection@srm.html
   [319]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-432/igt@kms_content_protection@srm.html

  * igt@kms_frontbuffer_tracking@drrs-2p-primscrn-cur-indfb-onoff:
    - shard-dg2-set2:     [SKIP][320] ([Intel XE#656]) -> [SKIP][321] ([Intel XE#651]) +4 other tests skip
   [320]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_frontbuffer_tracking@drrs-2p-primscrn-cur-indfb-onoff.html
   [321]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-436/igt@kms_frontbuffer_tracking@drrs-2p-primscrn-cur-indfb-onoff.html

  * igt@kms_frontbuffer_tracking@drrs-2p-primscrn-pri-indfb-draw-mmap-wc:
    - shard-bmg:          [SKIP][322] ([Intel XE#2311]) -> [SKIP][323] ([Intel XE#2312]) +13 other tests skip
   [322]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-2/igt@kms_frontbuffer_tracking@drrs-2p-primscrn-pri-indfb-draw-mmap-wc.html
   [323]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_frontbuffer_tracking@drrs-2p-primscrn-pri-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-indfb-pgflip-blt:
    - shard-bmg:          [SKIP][324] ([Intel XE#2312]) -> [SKIP][325] ([Intel XE#2311]) +14 other tests skip
   [324]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-6/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-indfb-pgflip-blt.html
   [325]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-7/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-indfb-pgflip-blt.html

  * igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-render:
    - shard-dg2-set2:     [SKIP][326] ([Intel XE#651]) -> [SKIP][327] ([Intel XE#656]) +7 other tests skip
   [326]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-433/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-render.html
   [327]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_frontbuffer_tracking@drrs-2p-scndscrn-pri-shrfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-fullscreen:
    - shard-bmg:          [SKIP][328] ([Intel XE#4141]) -> [SKIP][329] ([Intel XE#2312]) +8 other tests skip
   [328]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-2/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-fullscreen.html
   [329]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-fullscreen.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-render:
    - shard-bmg:          [SKIP][330] ([Intel XE#2312]) -> [SKIP][331] ([Intel XE#4141]) +3 other tests skip
   [330]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-4/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-render.html
   [331]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-2/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-dg2-set2:     [SKIP][332] ([Intel XE#656]) -> [SKIP][333] ([Intel XE#653]) +7 other tests skip
   [332]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-wc.html
   [333]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_frontbuffer_tracking@fbcpsr-2p-primscrn-spr-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-spr-indfb-fullscreen:
    - shard-dg2-set2:     [SKIP][334] ([Intel XE#653]) -> [SKIP][335] ([Intel XE#656]) +8 other tests skip
   [334]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-435/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-spr-indfb-fullscreen.html
   [335]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-464/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-spr-indfb-fullscreen.html

  * igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-mmap-wc:
    - shard-bmg:          [SKIP][336] ([Intel XE#2312]) -> [SKIP][337] ([Intel XE#2313]) +9 other tests skip
   [336]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-6/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-mmap-wc.html
   [337]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-3/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-pri-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@psr-2p-scndscrn-shrfb-plflip-blt:
    - shard-bmg:          [SKIP][338] ([Intel XE#2313]) -> [SKIP][339] ([Intel XE#2312]) +10 other tests skip
   [338]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-bmg-1/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-shrfb-plflip-blt.html
   [339]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-bmg-6/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-shrfb-plflip-blt.html

  * igt@kms_plane_multiple@2x-tiling-y:
    - shard-dg2-set2:     [SKIP][340] ([Intel XE#4596]) -> [SKIP][341] ([Intel XE#455])
   [340]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-464/igt@kms_plane_multiple@2x-tiling-y.html
   [341]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_plane_multiple@2x-tiling-y.html

  * igt@kms_tiled_display@basic-test-pattern:
    - shard-dg2-set2:     [SKIP][342] ([Intel XE#362]) -> [FAIL][343] ([Intel XE#1729])
   [342]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-463/igt@kms_tiled_display@basic-test-pattern.html
   [343]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@kms_tiled_display@basic-test-pattern.html

  * igt@xe_peer2peer@read:
    - shard-dg2-set2:     [SKIP][344] ([Intel XE#1061]) -> [FAIL][345] ([Intel XE#1173])
   [344]: https://intel-gfx-ci.01.org/tree/intel-xe/IGT_8288/shard-dg2-432/igt@xe_peer2peer@read.html
   [345]: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/shard-dg2-433/igt@xe_peer2peer@read.html

  
  [Intel XE#1061]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1061
  [Intel XE#1124]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1124
  [Intel XE#1173]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1173
  [Intel XE#1178]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1178
  [Intel XE#1340]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1340
  [Intel XE#1392]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1392
  [Intel XE#1401]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1401
  [Intel XE#1406]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1406
  [Intel XE#1407]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1407
  [Intel XE#1421]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1421
  [Intel XE#1424]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1424
  [Intel XE#1435]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1435
  [Intel XE#1439]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1439
  [Intel XE#1489]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1489
  [Intel XE#1499]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1499
  [Intel XE#1727]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1727
  [Intel XE#1729]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1729
  [Intel XE#1745]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1745
  [Intel XE#1794]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1794
  [Intel XE#1999]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1999
  [Intel XE#2049]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2049
  [Intel XE#2191]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2191
  [Intel XE#2234]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2234
  [Intel XE#2244]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2244
  [Intel XE#2252]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2252
  [Intel XE#2284]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2284
  [Intel XE#2286]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2286
  [Intel XE#2291]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2291
  [Intel XE#2293]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2293
  [Intel XE#2311]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2311
  [Intel XE#2312]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2312
  [Intel XE#2313]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2313
  [Intel XE#2314]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2314
  [Intel XE#2316]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2316
  [Intel XE#2320]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2320
  [Intel XE#2321]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2321
  [Intel XE#2322]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2322
  [Intel XE#2325]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2325
  [Intel XE#2327]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2327
  [Intel XE#2341]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2341
  [Intel XE#2360]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2360
  [Intel XE#2380]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2380
  [Intel XE#2391]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2391
  [Intel XE#2414]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2414
  [Intel XE#2505]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2505
  [Intel XE#2509]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2509
  [Intel XE#2541]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2541
  [Intel XE#2597]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2597
  [Intel XE#2652]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2652
  [Intel XE#2669]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2669
  [Intel XE#2705]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2705
  [Intel XE#2763]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2763
  [Intel XE#2850]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2850
  [Intel XE#288]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/288
  [Intel XE#2882]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2882
  [Intel XE#2883]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2883
  [Intel XE#2887]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2887
  [Intel XE#2893]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2893
  [Intel XE#2894]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2894
  [Intel XE#2905]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2905
  [Intel XE#2907]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2907
  [Intel XE#2939]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/2939
  [Intel XE#301]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/301
  [Intel XE#306]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/306
  [Intel XE#307]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/307
  [Intel XE#308]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/308
  [Intel XE#309]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/309
  [Intel XE#310]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/310
  [Intel XE#3113]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3113
  [Intel XE#3124]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3124
  [Intel XE#316]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/316
  [Intel XE#323]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/323
  [Intel XE#3321]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3321
  [Intel XE#3342]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3342
  [Intel XE#3414]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3414
  [Intel XE#3432]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3432
  [Intel XE#3573]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3573
  [Intel XE#362]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/362
  [Intel XE#366]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/366
  [Intel XE#367]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/367
  [Intel XE#373]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/373
  [Intel XE#3767]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3767
  [Intel XE#378]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/378
  [Intel XE#3862]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3862
  [Intel XE#3876]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3876
  [Intel XE#3889]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3889
  [Intel XE#3903]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3903
  [Intel XE#3904]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/3904
  [Intel XE#4130]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4130
  [Intel XE#4141]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4141
  [Intel XE#4212]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4212
  [Intel XE#4259]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4259
  [Intel XE#4302]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4302
  [Intel XE#4331]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4331
  [Intel XE#4345]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4345
  [Intel XE#4351]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4351
  [Intel XE#4418]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4418
  [Intel XE#4422]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4422
  [Intel XE#4439]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4439
  [Intel XE#4440]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4440
  [Intel XE#4459]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4459
  [Intel XE#4494]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4494
  [Intel XE#4501]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4501
  [Intel XE#4518]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4518
  [Intel XE#4522]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4522
  [Intel XE#4540]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4540
  [Intel XE#455]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/455
  [Intel XE#4568]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4568
  [Intel XE#4570]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4570
  [Intel XE#4577]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4577
  [Intel XE#4596]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/4596
  [Intel XE#584]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/584
  [Intel XE#616]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/616
  [Intel XE#651]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/651
  [Intel XE#653]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/653
  [Intel XE#656]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/656
  [Intel XE#664]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/664
  [Intel XE#688]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/688
  [Intel XE#702]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/702
  [Intel XE#718]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/718
  [Intel XE#756]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/756
  [Intel XE#771]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/771
  [Intel XE#787]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/787
  [Intel XE#836]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/836
  [Intel XE#870]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/870
  [Intel XE#886]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/886
  [Intel XE#929]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/929
  [Intel XE#944]: https://gitlab.freedesktop.org/drm/xe/kernel/issues/944


Build changes
-------------

  * IGT: IGT_8288 -> IGTPW_12851

  IGTPW_12851: 12851
  IGT_8288: 8288
  xe-2854-14c330bc015ded4a1f1dd1f5aeb8617077aaa7e8: 14c330bc015ded4a1f1dd1f5aeb8617077aaa7e8

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/intel-xe/IGTPW_12851/index.html

[-- Attachment #2: Type: text/html, Size: 92572 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH i-g-t] test/amdgpu: add user queue test
@ 2025-03-27  7:31 Jesse.zhang@amd.com
  0 siblings, 0 replies; 8+ messages in thread
From: Jesse.zhang@amd.com @ 2025-03-27  7:31 UTC (permalink / raw)
  To: igt-dev
  Cc: Vitaly Prosyak, Alex Deucher, Christian Koenig, Prike Liang,
	Srinivasan Shanmugam, Sunil Khatri, Jesse . zhang

From: "Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>"

This patch introduces a new test for AMDGPU user queues, which provides
functionality for userspace to manage GPU queues directly. The test covers:

1. Basic user queue operations for GFX, COMPUTE and SDMA IP blocks
2. Synchronization between user queues using syncobjs
3. Timeline-based synchronization
4. Multi-threaded signaling and waiting scenarios

Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Signed-off-by: Jesse.zhang <Jesse.zhang@amd.com>
---
 include/drm-uapi/amdgpu_drm.h  |  254 +++++
 tests/amdgpu/amd_userq_basic.c | 1706 ++++++++++++++++++++++++++++++++
 tests/amdgpu/meson.build       |    8 +-
 3 files changed, 1967 insertions(+), 1 deletion(-)
 create mode 100644 tests/amdgpu/amd_userq_basic.c

diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h
index efe5de6ce..d83216a59 100644
--- a/include/drm-uapi/amdgpu_drm.h
+++ b/include/drm-uapi/amdgpu_drm.h
@@ -54,6 +54,9 @@ extern "C" {
 #define DRM_AMDGPU_VM			0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
 #define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +74,9 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
 
 /**
  * DOC: memory domains
@@ -319,6 +325,241 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated to this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	__u32   _pad;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32	_pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
@@ -592,6 +833,19 @@ struct drm_amdgpu_gem_va {
 	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
 	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
 };
 
 #define AMDGPU_HW_IP_GFX          0
diff --git a/tests/amdgpu/amd_userq_basic.c b/tests/amdgpu/amd_userq_basic.c
new file mode 100644
index 000000000..b010fed7a
--- /dev/null
+++ b/tests/amdgpu/amd_userq_basic.c
@@ -0,0 +1,1706 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ */
+ #include <pthread.h>
+ #include <time.h>
+ #include "lib/amdgpu/amd_memory.h"
+ #include "lib/amdgpu/amd_sdma.h"
+ #include "lib/amdgpu/amd_PM4.h"
+ #include "lib/amdgpu/amd_command_submission.h"
+ #include "lib/amdgpu/amd_compute.h"
+ #include "lib/amdgpu/amd_gfx.h"
+ #include "lib/amdgpu/amd_shaders.h"
+ #include "lib/amdgpu/amd_dispatch.h"
+ #include "include/drm-uapi/amdgpu_drm.h"
+ #include "lib/amdgpu/amd_cs_radv.h"
+
+ #define BUFFER_SIZE (8 * 1024)
+
+/* Flag to indicate secure buffer related workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
+
+ #define PACKET_TYPE3			3
+ #define PACKET3(op, n)			((PACKET_TYPE3 << 30) |  \
+					(((op) & 0xFF) << 8)  |  \
+					((n) & 0x3FFF) << 16)
+
+ #define PACKET3_NOP			0x10
+ #define PACKET3_PROTECTED_FENCE_SIGNAL	0xd0
+ #define PACKET3_FENCE_WAIT_MULTI	0xd1
+ #define PACKET3_WRITE_DATA		0x37
+
+ #define PACKET3_WAIT_REG_MEM		0x3C
+ #define WAIT_REG_MEM_FUNCTION(x)	((x) << 0)
+ #define WAIT_REG_MEM_MEM_SPACE(x)	((x) << 4)
+ #define WAIT_REG_MEM_OPERATION(x)	((x) << 6)
+ #define WAIT_REG_MEM_ENGINE(x)		((x) << 8)
+
+ #define WR_CONFIRM			(1 << 20)
+ #define WRITE_DATA_DST_SEL(x)		((x) << 8)
+ #define WRITE_DATA_ENGINE_SEL(x)	((x) << 30)
+ #define WRITE_DATA_CACHE_POLICY(x)	((x) << 25)
+ #define WAIT_MEM_ENGINE_SEL(x)		((x) << 0)
+ #define WAIT_MEM_WAIT_PREEMPTABLE(x)	((x) << 1)
+ #define WAIT_MEM_CACHE_POLICY(x)	((x) << 2)
+ #define WAIT_MEM_POLL_INTERVAL(x)	((x) << 16)
+
+ #define DOORBELL_INDEX			4
+ #define AMDGPU_USERQ_BO_WRITE		1
+
+ #define	PACKET3_RELEASE_MEM				0x49
+ #define		PACKET3_RELEASE_MEM_CACHE_POLICY(x)	((x) << 25)
+ #define		PACKET3_RELEASE_MEM_DATA_SEL(x)		((x) << 29)
+ #define		PACKET3_RELEASE_MEM_INT_SEL(x)		((x) << 24)
+ #define		CACHE_FLUSH_AND_INV_TS_EVENT		0x00000014
+
+ #define		PACKET3_RELEASE_MEM_EVENT_TYPE(x)	((x) << 0)
+ #define		PACKET3_RELEASE_MEM_EVENT_INDEX(x)	((x) << 8)
+ #define		PACKET3_RELEASE_MEM_GCR_GLM_WB		(1 << 12)
+ #define		PACKET3_RELEASE_MEM_GCR_GLM_INV		(1 << 13)
+ #define		PACKET3_RELEASE_MEM_GCR_GLV_INV		(1 << 14)
+ #define		PACKET3_RELEASE_MEM_GCR_GL1_INV		(1 << 15)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_US		(1 << 16)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_RANGE	(1 << 17)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_DISCARD	(1 << 19)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_INV		(1 << 20)
+ #define		PACKET3_RELEASE_MEM_GCR_GL2_WB		(1 << 21)
+ #define		PACKET3_RELEASE_MEM_GCR_SEQ		(1 << 22)
+
+//SDMA related
+ #define SDMA_OPCODE_COPY		1
+ #define SDMA_OPCODE_WRITE		2
+ #define SDMA_COPY_SUB_OPCODE_LINEAR	0
+ #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |       \
+					(((sub_op) & 0xFF) << 8) |      \
+					(((op) & 0xFF) << 0))
+ #define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
+ #define lower_32_bits(n) ((uint32_t)((n) & 0xfffffffc))
+
+/* user queue IOCTL */
+ #define AMDGPU_USERQ_OP_CREATE  1
+ #define AMDGPU_USERQ_OP_FREE    2
+
+/* Flag to indicate secure buffer related workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_SECURE   (1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+ #define AMDGPU_USERQ_MQD_FLAGS_AQL      (1 << 1)
+
+//#define WORKLOAD_COUNT				7
+ #define WORKLOAD_COUNT				1
+ #define DEBUG_USERQUEUE				1
+
+ #define PAGE_SIZE			4096
+ #define USERMODE_QUEUE_SIZE		(PAGE_SIZE * 256)
+ #define ALIGNMENT			4096
+
+struct amdgpu_userq_bo {
+	amdgpu_bo_handle handle;
+	amdgpu_va_handle va_handle;
+	uint64_t mc_addr;
+	uint64_t size;
+	void *ptr;
+};
+
+static struct amdgpu_userq_bo shared_userq_bo;
+static int shared_syncobj_fd1;
+static int shared_syncobj_fd2;
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+ #if DEBUG_USERQUEUE
+static void packet_dump(uint32_t *ptr, int start, int end)
+{
+	int i;
+
+	igt_info("\n============PACKET==============\n");
+	for (i = start; i < end; i++)
+		igt_info("pkt[%d] = 0x%x\n", i - start, ptr[i]);
+
+	igt_info("=================================\n");
+}
+ #endif
+
+static void validation(uint32_t *workload)
+{
+	int i = 0;
+
+	while (workload[0] != 0xdeadbeaf) {
+		if (i++ > 100)
+			break;
+		usleep(100);
+	}
+
+	igt_info("\n========OUTPUT==========\n");
+	for (i = 0; i < 5; i++)
+		igt_info("worklod[%d] = %x\n", i, workload[i]);
+
+	igt_info("===========================\n");
+}
+
+static void create_relmem_workload(uint32_t *ptr, int *npkt, int data,
+			    uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
+			    uint32_t q_id, uint64_t addr)
+{
+	ptr[(*npkt)++] = (PACKET3(PACKET3_RELEASE_MEM, 6));
+	ptr[(*npkt)++] = 0x0030e514;
+	ptr[(*npkt)++] = 0x23010000;
+	ptr[(*npkt)++] = lower_32_bits(addr);
+	ptr[(*npkt)++] = upper_32_bits(addr);
+	ptr[(*npkt)++] = 0xffffffff & data;
+	ptr[(*npkt)++] = 0;
+	ptr[(*npkt)++] = q_id;
+	*wptr_cpu = *npkt;
+	doorbell_ptr[DOORBELL_INDEX] = *npkt;
+}
+
+static int create_submit_workload(uint32_t *ptr, int *npkt, uint32_t data,
+			   uint64_t *wptr_cpu, uint64_t *doorbell_ptr,
+			   uint32_t q_id, struct amdgpu_userq_bo *dstptr)
+{
+ #if DEBUG_USERQUEUE
+	int start = *npkt;
+ #endif
+	ptr[(*npkt)++] = PACKET3(PACKET3_WRITE_DATA, 7);
+	ptr[(*npkt)++] =
+	    WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
+
+	ptr[(*npkt)++] = 0xfffffffc & (dstptr->mc_addr);
+	ptr[(*npkt)++] = (0xffffffff00000000 & (dstptr->mc_addr)) >> 32;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	ptr[(*npkt)++] = data;
+	create_relmem_workload(ptr, npkt, 0xdeadbeaf, wptr_cpu,
+			       doorbell_ptr, q_id, dstptr->mc_addr);
+ #if DEBUG_USERQUEUE
+	packet_dump(ptr, start, *npkt);
+ #endif
+	return 0;
+}
+
+static void alloc_doorbell(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *doorbell_bo,
+			   unsigned int size, unsigned int domain)
+{
+	struct amdgpu_bo_alloc_request req = {0};
+	amdgpu_bo_handle buf_handle;
+	int r;
+
+	req.alloc_size = ALIGN(size, PAGE_SIZE);
+	req.preferred_heap = domain;
+
+	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
+	igt_assert_eq(r, 0);
+
+	doorbell_bo->handle = buf_handle;
+	doorbell_bo->size = req.alloc_size;
+
+	r = amdgpu_bo_cpu_map(doorbell_bo->handle,
+			      (void **)&doorbell_bo->ptr);
+	igt_assert_eq(r, 0);
+}
+
+static int timeline_syncobj_wait(amdgpu_device_handle device_handle, uint32_t timeline_syncobj_handle)
+{
+	uint64_t point, signaled_point;
+	uint64_t timeout;
+	struct timespec tp;
+	uint32_t flags = DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED;
+	int r;
+
+	do {
+		r = amdgpu_cs_syncobj_query2(device_handle, &timeline_syncobj_handle,
+					     (uint64_t *)&point, 1, flags);
+		if (r)
+			return r;
+
+		timeout = 0;
+		clock_gettime(CLOCK_MONOTONIC, &tp);
+		timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+		timeout += 100000000; //100 millisec
+		r = amdgpu_cs_syncobj_timeline_wait(device_handle, &timeline_syncobj_handle,
+						    (uint64_t *)&point, 1, timeout,
+						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+						    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						    NULL);
+		if (r)
+			return r;
+
+		r = amdgpu_cs_syncobj_query(device_handle, &timeline_syncobj_handle, &signaled_point, 1);
+		if (r)
+			return r;
+	} while (point != signaled_point);
+
+	return r;
+}
+
+static int
+amdgpu_bo_unmap_and_free_uq(amdgpu_device_handle dev, amdgpu_bo_handle bo,
+			    amdgpu_va_handle va_handle, uint64_t mc_addr, uint64_t size,
+			    uint32_t timeline_syncobj_handle, uint16_t point)
+{
+	amdgpu_bo_cpu_unmap(bo);
+	amdgpu_bo_va_op_raw2(dev, bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP, timeline_syncobj_handle, point, 0, 0);
+
+	amdgpu_va_range_free(va_handle);
+	amdgpu_bo_free(bo);
+
+	return 0;
+}
+
+static int amdgpu_bo_alloc_and_map_uq(amdgpu_device_handle dev,
+					      uint64_t size,
+					      uint64_t alignment,
+					      uint64_t heap,
+					      uint64_t alloc_flags,
+					      uint64_t mapping_flags,
+					      amdgpu_bo_handle *bo,
+					      void **cpu,
+					      uint64_t *mc_address,
+					      amdgpu_va_handle *va_handle,
+					      uint32_t timeline_syncobj_handle,
+					      uint64_t point)
+{
+	struct amdgpu_bo_alloc_request request = {};
+	amdgpu_bo_handle buf_handle;
+	amdgpu_va_handle handle;
+	uint64_t vmc_addr;
+	int r;
+
+	request.alloc_size = size;
+	request.phys_alignment = alignment;
+	request.preferred_heap = heap;
+	request.flags = alloc_flags;
+
+	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
+	if (r)
+		return r;
+
+	r = amdgpu_va_range_alloc(dev,
+				  amdgpu_gpu_va_range_general,
+				  size, alignment, 0, &vmc_addr,
+				  &handle, 0);
+	if (r)
+		goto error_va_alloc;
+
+	r = amdgpu_bo_va_op_raw2(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
+				   AMDGPU_VM_PAGE_READABLE |
+				   AMDGPU_VM_PAGE_WRITEABLE |
+				   AMDGPU_VM_PAGE_EXECUTABLE |
+				   mapping_flags,
+				   AMDGPU_VA_OP_MAP,
+				   timeline_syncobj_handle,
+				   point, 0, 0);
+	if (r) {
+		goto error_va_map;
+	}
+
+	r = amdgpu_bo_cpu_map(buf_handle, cpu);
+	if (r)
+		goto error_cpu_map;
+
+	*bo = buf_handle;
+	*mc_address = vmc_addr;
+	*va_handle = handle;
+
+	return 0;
+
+ error_cpu_map:
+	amdgpu_bo_cpu_unmap(buf_handle);
+ error_va_map:
+	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
+ error_va_alloc:
+	amdgpu_bo_free(buf_handle);
+	return r;
+}
+
+static void free_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
+		   uint32_t timeline_syncobj_handle, uint64_t point,
+		   uint64_t syncobj_handles_array, uint32_t num_syncobj_handles)
+{
+	int r;
+
+	r = amdgpu_bo_unmap_and_free_uq(device_handle, dstptr->handle, dstptr->va_handle,
+				     dstptr->mc_addr, PAGE_SIZE,
+				     timeline_syncobj_handle, point);
+	igt_assert_eq(r, 0);
+}
+
+static int allocate_workload(amdgpu_device_handle device_handle, struct amdgpu_userq_bo *dstptr,
+		      uint32_t timeline_syncobj_handle, uint64_t point)
+{
+
+	uint64_t gtt_flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+	int r;
+
+	r = amdgpu_bo_alloc_and_map_uq(device_handle, PAGE_SIZE,
+				       PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM,
+				       gtt_flags,
+				       AMDGPU_VM_MTYPE_UC,
+				       &dstptr->handle, &dstptr->ptr,
+				       &dstptr->mc_addr, &dstptr->va_handle,
+				       timeline_syncobj_handle, point);
+	memset(&dstptr->ptr, 0x0, sizeof(*dstptr->ptr));
+	return r;
+}
+
+static int create_sync_objects(int fd, uint32_t *timeline_syncobj_handle,
+			       uint32_t *timeline_syncobj_handle2)
+{
+	int r;
+
+	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle);
+	if (r)
+		return r;
+
+	r = drmSyncobjCreate(fd, 0, timeline_syncobj_handle2);
+
+	return r;
+}
+
+static void *userq_signal(void *data)
+{
+	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
+	uint32_t q_id, syncobj_handle, syncobj_handle1, db_handle;
+	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	struct  amdgpu_userq_bo gds, csa;
+	uint32_t syncarray[2];
+	uint32_t *ptr;
+	int r, i;
+	uint32_t timeline_syncobj_handle;
+	uint64_t point = 0;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t point2 = 0;
+	struct drm_amdgpu_userq_signal signal_data;
+	uint32_t bo_read_handles[1], bo_write_handles[1];
+	uint32_t read_handle, write_handle;
+
+
+	amdgpu_device_handle device = (amdgpu_device_handle)data;
+
+	int fd = amdgpu_device_get_fd(device);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+				ALIGNMENT,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&queue.handle, &queue.ptr,
+				&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&wptr_bo.handle, &wptr_bo.ptr,
+				&wptr_bo.mc_addr, &wptr_bo.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&rptr.handle, &rptr.ptr,
+				&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&shadow.handle, &shadow.ptr,
+				&shadow.mc_addr, &shadow.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&gds.handle, &gds.ptr,
+				&gds.mc_addr, &gds.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&csa.handle, &csa.ptr,
+				&csa.mc_addr, &csa.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	//mqd.gds_va = gds.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *)doorbell.ptr;
+
+	ptr = (uint32_t *)queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	wptr = (uint64_t *)wptr_bo.ptr;
+	memset(wptr, 0, sizeof(*wptr));
+
+	//amdgpu_userqueue_get_bo_handle(doorbell.handle, &db_handle);
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	r = drmSyncobjCreate(fd, 0, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &syncobj_handle1);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjHandleToFD(fd, syncobj_handle, &shared_syncobj_fd2);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjHandleToFD(fd, syncobj_handle1, &shared_syncobj_fd1);
+	igt_assert_eq(r, 0);
+
+	syncarray[0] = syncobj_handle;
+	syncarray[1] = syncobj_handle1;
+
+	ptr[0] = PACKET3(PACKET3_WRITE_DATA, 7);
+	ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM | WRITE_DATA_CACHE_POLICY(3);
+	ptr[2] = 0xfffffffc & (shared_userq_bo.mc_addr);
+	ptr[3] = (0xffffffff00000000 & (shared_userq_bo.mc_addr)) >> 32;
+	ptr[4] = 0xdeadbeaf;
+	ptr[5] = 0xdeadbeaf;
+	ptr[6] = 0xdeadbeaf;
+	ptr[7] = 0xdeadbeaf;
+	ptr[8] = 0xdeadbeaf;
+
+	for (i = 9; i <= 60; i++)
+		ptr[i] = PACKET3(PACKET3_NOP, 0x3fff);
+
+	ptr[i++] = PACKET3(PACKET3_PROTECTED_FENCE_SIGNAL, 0);
+
+	*wptr = ++i;
+	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
+	igt_assert_eq(r, 0);
+	// Assign the exported handles to the arrays
+	bo_read_handles[0] = read_handle;
+	bo_write_handles[0] = write_handle;
+
+	signal_data.queue_id = q_id;
+	signal_data.syncobj_handles = (uint64_t)&syncarray;
+	signal_data.num_syncobj_handles = 2;
+	signal_data.bo_write_handles = (uint64_t)bo_write_handles;
+	signal_data.num_bo_write_handles = 1;
+	signal_data.bo_read_handles = (uint64_t)bo_read_handles;
+	signal_data.num_bo_read_handles = 1;
+
+	r = amdgpu_userq_signal(device, &signal_data);
+	igt_assert_eq(r, 0);
+
+	doorbell_ptr[DOORBELL_INDEX]  = i;
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+	if (!r)
+		pthread_cond_signal(&cond);
+
+err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					csa.va_handle,
+					csa.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					gds.va_handle,
+					gds.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					shadow.va_handle,
+					shadow.mc_addr, PAGE_SIZE * 4,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
+				     rptr.mc_addr, PAGE_SIZE);
+
+	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
+				     wptr_bo.mc_addr, PAGE_SIZE);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				     queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+
+	return (void *)(long)r;
+}
+
+static void *userq_wait(void *data)
+{
+	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr_bo, rptr;
+	struct  amdgpu_userq_bo gds, csa;
+	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+	uint32_t syncobj_handle, syncobj_handle1, db_handle;
+	uint64_t num_fences;
+	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr;
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	uint64_t gpu_addr, reference_val;
+	uint32_t *ptr;
+	uint32_t q_id;
+	int i, r, fd;
+	uint32_t timeline_syncobj_handle;
+	uint64_t point = 0;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t point2 = 0;
+	struct drm_amdgpu_userq_wait wait_data;
+	uint32_t bo_read_handles[1], bo_write_handles[1];
+	uint32_t read_handle, write_handle;
+	uint32_t syncarray[3], points[3];
+	amdgpu_device_handle device;
+
+	pthread_mutex_lock(&lock);
+	pthread_cond_wait(&cond, &lock);
+	pthread_mutex_unlock(&lock);
+
+	device = (amdgpu_device_handle)data;
+	fd = amdgpu_device_get_fd(device);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+				ALIGNMENT,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&queue.handle, &queue.ptr,
+				&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&wptr_bo.handle, &wptr_bo.ptr,
+				&wptr_bo.mc_addr, &wptr_bo.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_raw(device, PAGE_SIZE,
+				PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&rptr.handle, &rptr.ptr,
+				&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&shadow.handle, &shadow.ptr,
+				&shadow.mc_addr, &shadow.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&gds.handle, &gds.ptr,
+				&gds.mc_addr, &gds.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&csa.handle, &csa.ptr,
+				&csa.mc_addr, &csa.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *)doorbell.ptr;
+
+	ptr = (uint32_t *)queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	wptr = (uint64_t *)wptr_bo.ptr;
+	memset(wptr, 0, sizeof(*wptr));
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr_bo.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd1, &syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjFDToHandle(fd, shared_syncobj_fd2, &syncobj_handle1);
+	igt_assert_eq(r, 0);
+
+	syncarray[0] = syncobj_handle;
+	syncarray[1] = syncobj_handle1;
+
+	points[0] = 0;
+	points[1] = 0;
+	num_fences = 0;
+	 r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
+	igt_assert_eq(r, 0);
+
+	// Assign the exported handles to the arrays
+	bo_read_handles[0] = read_handle;
+	bo_write_handles[0] = write_handle;
+
+	wait_data.syncobj_handles = (uint64_t)syncarray;
+	wait_data.num_syncobj_handles = 2;
+	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
+	wait_data.syncobj_timeline_points = (uint64_t)points;
+	wait_data.num_syncobj_timeline_handles = 2;
+	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
+	wait_data.num_bo_read_handles = 1;
+	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
+	wait_data.num_bo_write_handles = 1;
+	wait_data.out_fences = (uint64_t)fence_info;
+	wait_data.num_fences = num_fences;
+
+	igt_assert_eq(r, 0);
+
+	num_fences = wait_data.num_fences;
+	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
+	if (!fence_info)
+		goto err_free_queue;
+	memset(fence_info, 0, num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
+	wait_data.out_fences = (uint64_t)fence_info;
+	r = amdgpu_userq_wait(device, &wait_data);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < num_fences; i++) {
+		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
+			num_fences, (fence_info + i)->va, (fence_info + i)->value);
+
+		gpu_addr = (fence_info + i)->va;
+		reference_val = (fence_info + i)->value;
+		ptr[0] = PACKET3(PACKET3_FENCE_WAIT_MULTI, 4);
+		ptr[1] = WAIT_MEM_ENGINE_SEL(1) | WAIT_MEM_WAIT_PREEMPTABLE(0) | WAIT_MEM_CACHE_POLICY(3) | WAIT_MEM_POLL_INTERVAL(2);
+		ptr[2] = 0xffffffff & (gpu_addr);
+		ptr[3] = (0xffffffff00000000 & (gpu_addr)) >> 16;
+		ptr[4] = 0xffffffff & (reference_val);
+		ptr[5] = (0xffffffff00000000 & (reference_val)) >> 32;
+		*wptr = 6;
+		doorbell_ptr[DOORBELL_INDEX]  = 6;
+	}
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					csa.va_handle,
+					csa.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					gds.va_handle,
+					gds.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					shadow.va_handle,
+					shadow.mc_addr, PAGE_SIZE * 4,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
+				     rptr.mc_addr, PAGE_SIZE);
+	//igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(wptr_bo.handle, wptr_bo.va_handle,
+				     wptr_bo.mc_addr, PAGE_SIZE);
+	//igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				     queue.mc_addr, USERMODE_QUEUE_SIZE);
+	//igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, syncobj_handle1);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+	free(fence_info);
+	return (void *)(long)r;
+}
+
+static void amdgpu_command_submission_umq_synchronize_test(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r;
+	static pthread_t signal_thread, wait_thread;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+
+
+	int fd = amdgpu_device_get_fd(device);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE,
+				       ALIGNMENT,
+				       AMDGPU_GEM_DOMAIN_GTT,
+				       gtt_flags,
+				       AMDGPU_VM_MTYPE_UC,
+				       &shared_userq_bo.handle, &shared_userq_bo.ptr,
+				       &shared_userq_bo.mc_addr, &shared_userq_bo.va_handle,
+				       timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = pthread_create(&signal_thread, NULL, userq_signal, device);
+	igt_assert_eq(r, 0);
+
+	r = pthread_create(&wait_thread, NULL, userq_wait, device);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(signal_thread, NULL);
+	igt_assert_eq(r, 0);
+
+	r = pthread_join(wait_thread, NULL);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free_uq(device, shared_userq_bo.handle,
+				    shared_userq_bo.va_handle,
+				    shared_userq_bo.mc_addr,
+				    PAGE_SIZE, timeline_syncobj_handle2,
+				    ++point2);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+}
+
+static void amdgpu_command_submission_umq_timeline_test(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	struct  amdgpu_userq_bo queue, shadow, doorbell, wptr, rptr;
+	struct  amdgpu_userq_bo gds, csa;
+	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+	uint64_t num_fences;
+	uint64_t gtt_flags = 0, *doorbell_ptr, *wptr_cpu;
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	struct  amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
+	uint32_t q_id, db_handle, *ptr;
+	uint32_t timeline_syncobj_handle;
+	uint64_t point = 0;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t point2 = 0;
+	uint32_t syncarray[3], points[3];
+	uint32_t test_timeline_syncobj_handle;
+	uint32_t test_timeline_syncobj_handle2;
+	uint64_t signal_point, payload;
+	struct drm_amdgpu_userq_wait wait_data;
+	int i, r, npkt = 0;
+	uint32_t bo_read_handles[1], bo_write_handles[1];
+	uint32_t read_handle, write_handle;
+	int fd = amdgpu_device_get_fd(device);
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjCreate(fd, 0, &test_timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r =  amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				gtt_flags,
+				AMDGPU_VM_MTYPE_UC,
+				&shadow.handle, &shadow.ptr,
+				&shadow.mc_addr, &shadow.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&gds.handle, &gds.ptr,
+				&gds.mc_addr, &gds.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				gtt_flags,
+				0,
+				&csa.handle, &csa.ptr,
+				&csa.mc_addr, &csa.va_handle,
+				timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	ptr = (uint32_t *)queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	wptr_cpu = (uint64_t *)wptr.ptr;
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *)dstptrs[i].ptr);
+	signal_point = 5;
+	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 5);
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle, ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111*(i+1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *)dstptrs[i].ptr);
+
+	signal_point = 10;
+	r = amdgpu_cs_syncobj_timeline_signal(device, &test_timeline_syncobj_handle,
+					      &signal_point, 1);
+	igt_assert_eq(r, 0);
+	r = amdgpu_cs_syncobj_query(device, &test_timeline_syncobj_handle,
+				    &payload, 1);
+	igt_assert_eq(r, 0);
+	igt_assert_eq(payload, 10);
+
+	syncarray[0] = test_timeline_syncobj_handle;
+	syncarray[1] = test_timeline_syncobj_handle;
+
+	points[0] = 5;
+	points[1] = 10;
+
+	num_fences = 0;
+
+	// Export the buffer object handles
+	r = amdgpu_bo_export(queue.handle, amdgpu_bo_handle_type_kms, &read_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(shadow.handle, amdgpu_bo_handle_type_kms, &write_handle);
+	igt_assert_eq(r, 0);
+
+	// Assign the exported handles to the arrays
+	bo_read_handles[0] = read_handle;
+	bo_write_handles[0] = write_handle;
+
+	wait_data.syncobj_handles = (uint64_t)syncarray;
+	wait_data.num_syncobj_handles = 2;
+	wait_data.syncobj_timeline_handles = (uint64_t)syncarray;
+	wait_data.syncobj_timeline_points = (uint64_t)points;
+	wait_data.num_syncobj_timeline_handles = 2;
+	wait_data.bo_read_handles =  (uint64_t)bo_read_handles;
+	wait_data.num_bo_read_handles = 1;
+	wait_data.bo_write_handles = (uint64_t)bo_write_handles;
+	wait_data.num_bo_write_handles = 1;
+	wait_data.out_fences = (uint64_t)fence_info;
+	wait_data.num_fences = num_fences;
+	r = amdgpu_userq_wait(device, &wait_data);
+	igt_assert_eq(r, 0);
+
+	fence_info = malloc(num_fences * sizeof(struct drm_amdgpu_userq_fence_info));
+	r = amdgpu_userq_wait(device, &wait_data);
+	igt_assert_eq(r, 0);
+
+	for (i = 0; i < num_fences; i++)
+		igt_info("num_fences = %lu fence_info.va=0x%llx fence_info.value=%llu\n",
+			num_fences, (fence_info + i)->va, (fence_info + i)->value);
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+	/* Free workload*/
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
+			      0, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					csa.va_handle,
+					csa.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					gds.va_handle,
+					gds.mc_addr, PAGE_SIZE,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					shadow.va_handle,
+					shadow.mc_addr, PAGE_SIZE * 4,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle,
+				     rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle,
+				     wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				     queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	r = drmSyncobjDestroy(fd, test_timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+}
+
+/**
+ * AMDGPU_HW_IP_DMA
+ * @param device
+ */
+static void amdgpu_command_submission_umq_sdma(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r, i = 0, j = 0;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t *ptr, *dstptr;
+	uint32_t q_id, db_handle;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t *doorbell_ptr, *wptr_cpu;
+	const int sdma_write_length = WORKLOAD_COUNT;
+	struct drm_amdgpu_userq_mqd_sdma_gfx11 mqd;
+	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, dst;
+	int fd = amdgpu_device_get_fd(device);
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 10,
+				       ALIGNMENT,
+				       AMDGPU_GEM_DOMAIN_VRAM,
+				       gtt_flags | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+				       AMDGPU_VM_MTYPE_UC,
+				       &dst.handle, &dst.ptr,
+				       &dst.mc_addr, &dst.va_handle,
+				       timeline_syncobj_handle, ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE * 2, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	wptr_cpu = (uint64_t *) wptr.ptr;
+
+	ptr = (uint32_t *) queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	dstptr = (uint32_t *)dst.ptr;
+	memset(dstptr, 0, sizeof(*dstptr) * sdma_write_length);
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_DMA,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	ptr[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 0, 0);
+	ptr[i++] = lower_32_bits(dst.mc_addr);
+	ptr[i++] = upper_32_bits(dst.mc_addr);
+	ptr[i++] = sdma_write_length - 1;
+	while (j++ < sdma_write_length)
+		ptr[i++] = 0xdeadbeaf;
+
+	*wptr_cpu = i << 2;
+
+	doorbell_ptr[DOORBELL_INDEX] = i << 2;
+
+	i = 0;
+	while (dstptr[0] != 0xdeadbeaf) {
+		if (i++ > 100)
+			break;
+		usleep(100);
+	}
+
+	for (int k = 0; k < sdma_write_length; k++) {
+		igt_assert_eq(dstptr[k], 0xdeadbeaf);
+	}
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+
+ err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, dst.handle,
+					dst.va_handle, dst.mc_addr,
+					PAGE_SIZE * 10,
+					timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				 queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+}
+
+/**
+ * AMDGPU_HW_IP_COMPUTE
+ * @param device
+ */
+static void amdgpu_command_submission_umq_compute(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r, i = 0, npkt = 0;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t *ptr;
+	uint32_t q_id, db_handle;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t *doorbell_ptr, *wptr_cpu;
+	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
+	struct drm_amdgpu_userq_mqd_compute_gfx11 mqd;
+	struct amdgpu_userq_bo queue, doorbell, rptr, wptr, eop;
+	int fd = amdgpu_device_get_fd(device);
+
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, 256,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &eop.handle, &eop.ptr,
+					       &eop.mc_addr, &eop.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.eop_va = eop.mc_addr;
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	wptr_cpu = (uint64_t *) wptr.ptr;
+
+	ptr = (uint32_t *) queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_COMPUTE,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	/* allocate workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
+				      ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	/* create workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	/* validation 0f workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *) dstptrs[i].ptr);
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+	/* Free workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
+			      0, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+
+ err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, eop.handle,
+					     eop.va_handle, eop.mc_addr,
+					     256,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				 queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+}
+
+/**
+ * AMDGPU_HW_IP_GFX
+ * @param device
+ */
+static void amdgpu_command_submission_umq_gfx(amdgpu_device_handle device,
+					      bool ce_avails)
+{
+	int r, i = 0, npkt = 0;
+	uint64_t gtt_flags = 0;
+	uint16_t point = 0;
+	uint16_t point2 = 0;
+	uint32_t *ptr;
+	uint32_t q_id, db_handle;
+	uint32_t timeline_syncobj_handle;
+	uint32_t timeline_syncobj_handle2;
+	uint64_t *doorbell_ptr, *wptr_cpu;
+	struct amdgpu_userq_bo dstptrs[WORKLOAD_COUNT];
+	struct drm_amdgpu_userq_mqd_gfx11 mqd;
+	struct amdgpu_userq_bo queue, shadow, doorbell, rptr, wptr, gds, csa;
+	int fd = amdgpu_device_get_fd(device);
+
+	r = create_sync_objects(fd, &timeline_syncobj_handle,
+				&timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, USERMODE_QUEUE_SIZE,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&queue.handle, &queue.ptr,
+					&queue.mc_addr, &queue.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&wptr.handle, &wptr.ptr,
+					&wptr.mc_addr, &wptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device, 8,
+					ALIGNMENT,
+					AMDGPU_GEM_DOMAIN_GTT,
+					gtt_flags,
+					AMDGPU_VM_MTYPE_UC,
+					&rptr.handle, &rptr.ptr,
+					&rptr.mc_addr, &rptr.va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 18,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &shadow.handle, &shadow.ptr,
+					       &shadow.mc_addr,
+					       &shadow.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 4,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &gds.handle, &gds.ptr,
+					       &gds.mc_addr, &gds.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_uq(device, PAGE_SIZE * 20,
+					       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					       gtt_flags, AMDGPU_VM_MTYPE_UC,
+					       &csa.handle, &csa.ptr,
+					       &csa.mc_addr, &csa.va_handle,
+					       timeline_syncobj_handle,
+					       ++point);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	alloc_doorbell(device, &doorbell, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL);
+
+	mqd.shadow_va = shadow.mc_addr;
+	mqd.csa_va = csa.mc_addr;
+
+	doorbell_ptr = (uint64_t *) doorbell.ptr;
+
+	wptr_cpu = (uint64_t *) wptr.ptr;
+
+	ptr = (uint32_t *) queue.ptr;
+	memset(ptr, 0, sizeof(*ptr));
+
+	amdgpu_bo_export(doorbell.handle, amdgpu_bo_handle_type_kms, &db_handle);
+
+
+	/* Create the Usermode Queue */
+	r = amdgpu_create_userqueue(device, AMDGPU_HW_IP_GFX,
+				    db_handle, DOORBELL_INDEX,
+				    queue.mc_addr, USERMODE_QUEUE_SIZE,
+				    wptr.mc_addr, rptr.mc_addr, &mqd, &q_id);
+	igt_assert_eq(r, 0);
+	if (r)
+		goto err_free_queue;
+
+	/* allocate workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = allocate_workload(device, &dstptrs[i], timeline_syncobj_handle,
+				      ++point);
+		igt_assert_eq(r, 0);
+	}
+
+	/* wait */
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle);
+	igt_assert_eq(r, 0);
+
+	/* create workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++) {
+		r = create_submit_workload(ptr, &npkt, 0x1111 * (i + 1),
+					   wptr_cpu, doorbell_ptr, q_id,
+					   &dstptrs[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	/* validation 0f workload pkt */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		validation((uint32_t *) dstptrs[i].ptr);
+
+	/* Free the Usermode Queue */
+	r = amdgpu_free_userqueue(device, q_id);
+	igt_assert_eq(r, 0);
+
+	/* Free workload */
+	for (i = 0; i < WORKLOAD_COUNT; i++)
+		free_workload(device, &dstptrs[i], timeline_syncobj_handle2, ++point2,
+			      0, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+
+ err_free_queue:
+	r = amdgpu_bo_unmap_and_free_uq(device, csa.handle,
+					     csa.va_handle, csa.mc_addr,
+					     PAGE_SIZE,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, gds.handle,
+					     gds.va_handle, gds.mc_addr, PAGE_SIZE,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_unmap_and_free_uq(device, shadow.handle,
+					     shadow.va_handle, shadow.mc_addr,
+					     PAGE_SIZE * 4,
+					     timeline_syncobj_handle2, ++point2);
+	igt_assert_eq(r, 0);
+
+	r = timeline_syncobj_wait(device, timeline_syncobj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_cpu_unmap(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_free(doorbell.handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_unmap_and_free(rptr.handle, rptr.va_handle, rptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(wptr.handle, wptr.va_handle, wptr.mc_addr, 8);
+
+	amdgpu_bo_unmap_and_free(queue.handle, queue.va_handle,
+				 queue.mc_addr, USERMODE_QUEUE_SIZE);
+
+	drmSyncobjDestroy(fd, timeline_syncobj_handle);
+	drmSyncobjDestroy(fd, timeline_syncobj_handle2);
+}
+
+igt_main
+{
+	amdgpu_device_handle device;
+	struct amdgpu_gpu_info gpu_info = {0};
+	struct drm_amdgpu_info_hw_ip info = {0};
+	int fd = -1;
+	int r;
+	bool arr_cap[AMD_IP_MAX] = {0};
+
+	igt_fixture {
+		uint32_t major, minor;
+		int err;
+
+		fd = drm_open_driver(DRIVER_AMDGPU);
+
+		err = amdgpu_device_initialize(fd, &major, &minor, &device);
+		igt_require(err == 0);
+		r = amdgpu_query_gpu_info(device, &gpu_info);
+		igt_assert_eq(r, 0);
+		r = amdgpu_query_hw_ip_info(device, AMDGPU_HW_IP_GFX, 0, &info);
+		igt_assert_eq(r, 0);
+		r = setup_amdgpu_ip_blocks(major, minor,  &gpu_info, device);
+		igt_assert_eq(r, 0);
+		asic_rings_readness(device, 1, arr_cap);
+	}
+
+	igt_describe("Check-GFX-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
+	igt_subtest_with_dynamic("umq-gfx-with-IP-GFX") {
+		if (arr_cap[AMD_IP_GFX]) {
+			igt_dynamic_f("umq-gfx")
+			    amdgpu_command_submission_umq_gfx(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-COMPUTE-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
+	igt_subtest_with_dynamic("umq-gfx-with-IP-COMPUTE") {
+		if (arr_cap[AMD_IP_COMPUTE]) {
+			igt_dynamic_f("umq-compute")
+			    amdgpu_command_submission_umq_compute(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-SDMA-UMQ-for-every-available-ring-works-for-write-const-fill-and-copy-operation-using-more-than-one-IB-and-shared-IB");
+	igt_subtest_with_dynamic("umq-gfx-with-IP-SDMA") {
+		if (arr_cap[AMD_IP_DMA]) {
+			igt_dynamic_f("umq-sdma")
+			    amdgpu_command_submission_umq_sdma(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-amdgpu_command_submission_umq_timeline_test");
+	igt_subtest_with_dynamic("umq-Syncobj-timeline") {
+		if (arr_cap[AMD_IP_DMA]) {
+			igt_dynamic_f("umq_timeline")
+			    amdgpu_command_submission_umq_timeline_test(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_describe("Check-amdgpu_command_submission_umq_synchronize_test");
+	igt_subtest_with_dynamic("umq-Synchronize") {
+		if (arr_cap[AMD_IP_DMA]) {
+			igt_dynamic_f("umq_synchronize")
+			    amdgpu_command_submission_umq_synchronize_test(device,
+							      info.
+							      hw_ip_version_major
+							      < 11);
+		}
+	}
+
+	igt_fixture {
+		amdgpu_device_deinitialize(device);
+		drm_close_driver(fd);
+	}
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 7d40f788b..a15a3884c 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -63,7 +63,13 @@ if libdrm_amdgpu.found()
 	else
 		warning('libdrm <= 2.4.104 found, amd_queue_reset test not applicable')
 	endif
-	amdgpu_deps += libdrm_amdgpu
+	 # Check for amdgpu_create_userqueue function
+        if cc.has_function('amdgpu_create_userqueue', dependencies: libdrm_amdgpu)
+                amdgpu_progs += [ 'amd_userq_basic' ]
+        else
+                warning('amdgpu_create_userqueue not found in libdrm_amdgpu, skipping amd userq test')
+        endif
+        amdgpu_deps += libdrm_amdgpu
 endif
 
 foreach prog : amdgpu_progs
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-04-06 13:43 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-27  7:17 [PATCH i-g-t] test/amdgpu: add user queue test Jesse.zhang@amd.com
2025-03-27  7:50 ` ✓ Xe.CI.BAT: success for " Patchwork
2025-03-27  8:13 ` ✗ i915.CI.BAT: failure " Patchwork
2025-03-27 15:31 ` ✗ Xe.CI.Full: " Patchwork
2025-03-27 16:32 ` [PATCH i-g-t] " Kamil Konieczny
2025-03-27 19:00 ` vitaly prosyak
2025-04-06 13:43 ` ✗ Xe.CI.Full: failure for " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2025-03-27  7:31 [PATCH i-g-t] " Jesse.zhang@amd.com

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox