AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra
@ 2026-04-24  8:18 Jesse Zhang
  2026-04-24  8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
                   ` (8 more replies)
  0 siblings, 9 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

Add a per-device qword-slot pool covering the firmware-managed NBIO
SDMA decode window (BAR dwords [sdma_engine[0],
sdma_engine[0] + sdma_doorbell_range * num_instances)) — the only
range whose writes are routed to the SDMA back-end.  Kernel SDMA ring
slots are pre-masked at init.

The window is exposed to userspace as a custom drm_gem_object: no TTM
backing, custom .mmap callback that does io_remap_pfn_range from the
SDMA decode window's BAR address.  Per-fpriv GEM handles for that BO
can be minted on demand via amdgpu_sdma_userq_doorbell_create_handle()
so userspace mmap()s through the standard drm_gem_mmap path — no
file_operations override and no fixed mmap pgoff sentinel.

Slots are allocated/freed via amdgpu_sdma_userq_doorbell_alloc/free.
The init/fini and the AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl that uses
create_handle land in subsequent patches.

Suggested-by:Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 164 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  55 ++++++++
 2 files changed, 219 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 321310ba2c08..1c61761c0046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -22,6 +22,8 @@
  */
 
 #include <linux/firmware.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_file.h>
 #include "amdgpu.h"
 #include "amdgpu_sdma.h"
 #include "amdgpu_ras.h"
@@ -200,6 +202,168 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
 	       sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
 }
 
+static int amdgpu_sdma_userq_db_obj_mmap(struct drm_gem_object *obj,
+					 struct vm_area_struct *vma)
+{
+	struct amdgpu_sdma_userq_db_obj *db = to_amdgpu_sdma_userq_db(obj);
+
+	if (vma->vm_end - vma->vm_start > round_up(db->size, PAGE_SIZE))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+		     VM_DONTDUMP | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return io_remap_pfn_range(vma, vma->vm_start,
+				  db->phys_base >> PAGE_SHIFT,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static void amdgpu_sdma_userq_db_obj_free(struct drm_gem_object *obj)
+{
+	struct amdgpu_sdma_userq_db_obj *db = to_amdgpu_sdma_userq_db(obj);
+
+	drm_gem_object_release(obj);
+	kfree(db);
+}
+
+static const struct drm_gem_object_funcs amdgpu_sdma_userq_db_obj_funcs = {
+	.free = amdgpu_sdma_userq_db_obj_free,
+	.mmap = amdgpu_sdma_userq_db_obj_mmap,
+};
+
+int amdgpu_sdma_userq_doorbell_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_sdma_userq_db_obj *db;
+	u32 base_dw, size_dw, nslots, ring_dw;
+	int i, r;
+
+	if (!adev->userq_funcs[AMDGPU_HW_IP_DMA])
+		return 0;
+
+	base_dw = adev->doorbell_index.sdma_engine[0] << 1;
+	size_dw = adev->doorbell_index.sdma_doorbell_range *
+		  adev->sdma.num_instances;
+	nslots  = size_dw / 2;	/* qword slots */
+	if (!nslots)
+		return 0;
+
+	db = kzalloc(sizeof(*db), GFP_KERNEL);
+	if (!db)
+		return -ENOMEM;
+
+	db->phys_base = adev->doorbell.base +
+			(resource_size_t)base_dw * sizeof(u32);
+	db->size      = size_dw * sizeof(u32);
+	db->base.funcs = &amdgpu_sdma_userq_db_obj_funcs;
+
+	drm_gem_private_object_init(adev_to_drm(adev), &db->base,
+				    round_up(db->size, PAGE_SIZE));
+	r = drm_gem_create_mmap_offset(&db->base);
+	if (r) {
+		drm_gem_object_put(&db->base);
+		return r;
+	}
+
+	mutex_init(&adev->sdma.userq_db_mutex);
+	adev->sdma.userq_db_bitmap = bitmap_zalloc(nslots, GFP_KERNEL);
+	if (!adev->sdma.userq_db_bitmap) {
+		drm_gem_object_put(&db->base);
+		return -ENOMEM;
+	}
+
+	adev->sdma.userq_db_obj    = db;
+	adev->sdma.userq_db_nslots = nslots;
+
+	/*
+	 * Mask out the qword slots used by the kernel SDMA rings
+	 * (sdma_engine[i] << 1 in absolute BAR dwords ⇒ qword slot
+	 * (sdma_engine[i] - sdma_engine[0]) within this window).
+	 */
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring_dw = adev->doorbell_index.sdma_engine[i] << 1;
+		if (ring_dw >= base_dw && ring_dw < base_dw + size_dw)
+			set_bit((ring_dw - base_dw) / 2,
+				adev->sdma.userq_db_bitmap);
+	}
+
+	dev_info(adev->dev,
+		 "SDMA UMQ doorbell pool: %u qword slots in BAR dword [%u, %u)\n",
+		 nslots, base_dw, base_dw + size_dw);
+	return 0;
+}
+
+void amdgpu_sdma_userq_doorbell_fini(struct amdgpu_device *adev)
+{
+	if (!adev->sdma.userq_db_obj)
+		return;
+	bitmap_free(adev->sdma.userq_db_bitmap);
+	adev->sdma.userq_db_bitmap = NULL;
+	adev->sdma.userq_db_nslots = 0;
+	drm_gem_object_put(&adev->sdma.userq_db_obj->base);
+	adev->sdma.userq_db_obj = NULL;
+}
+
+/*
+ * Allocate one qword doorbell slot.  On success, *out_slot receives the
+ * slot id (also the qword index inside the userspace mmap of the window
+ * BO) which the caller passes back to free.
+ */
+int amdgpu_sdma_userq_doorbell_alloc(struct amdgpu_device *adev, u32 *out_slot)
+{
+	u32 slot;
+
+	if (!adev->sdma.userq_db_obj || !adev->sdma.userq_db_nslots)
+		return -ENODEV;
+
+	mutex_lock(&adev->sdma.userq_db_mutex);
+	slot = find_first_zero_bit(adev->sdma.userq_db_bitmap,
+				   adev->sdma.userq_db_nslots);
+	if (slot >= adev->sdma.userq_db_nslots) {
+		mutex_unlock(&adev->sdma.userq_db_mutex);
+		return -ENOSPC;
+	}
+	set_bit(slot, adev->sdma.userq_db_bitmap);
+	mutex_unlock(&adev->sdma.userq_db_mutex);
+
+	*out_slot = slot;
+	return 0;
+}
+
+void amdgpu_sdma_userq_doorbell_free(struct amdgpu_device *adev, u32 slot)
+{
+	if (!adev->sdma.userq_db_obj)
+		return;
+	if (slot >= adev->sdma.userq_db_nslots)
+		return;
+	mutex_lock(&adev->sdma.userq_db_mutex);
+	clear_bit(slot, adev->sdma.userq_db_bitmap);
+	mutex_unlock(&adev->sdma.userq_db_mutex);
+}
+
+/*
+ * Mint a per-fpriv GEM handle for the per-device SDMA UMQ doorbell BO.
+ * Userspace then uses standard GEM_MMAP / mmap() on /dev/dri/cardN to
+ * obtain a CPU pointer to the routable doorbell window.
+ */
+int amdgpu_sdma_userq_doorbell_create_handle(struct amdgpu_device *adev,
+					     struct drm_file *filp,
+					     u32 *handle, u32 *size_bytes)
+{
+	int r;
+
+	if (!adev->sdma.userq_db_obj)
+		return -ENODEV;
+
+	r = drm_gem_handle_create(filp, &adev->sdma.userq_db_obj->base, handle);
+	if (r)
+		return r;
+
+	*size_bytes = adev->sdma.userq_db_obj->size;
+	return 0;
+}
+
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
 			       u32 instance, bool duplicate)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 2bf365609775..93a7eb9746d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -146,6 +146,20 @@ struct amdgpu_sdma {
 	bool			disable_uq;
 	void (*get_csa_info)(struct amdgpu_device *adev,
 			     struct amdgpu_sdma_csa_info *csa_info);
+
+	/*
+	 * SDMA usermode-queue doorbell pool.  The window covers
+	 * BAR dwords [sdma_engine[0], sdma_engine[0] +
+	 * sdma_doorbell_range * num_instances) — the only range that NBIO
+	 * routes to the SDMA back-end.  Each bit in the bitmap represents
+	 * one qword slot; kernel SDMA ring slots are pre-masked at init.
+	 * The window is exposed to userspace as a custom drm_gem_object
+	 * (userq_db_obj) that is mmap'd via standard GEM_MMAP.
+	 */
+	struct amdgpu_sdma_userq_db_obj *userq_db_obj;
+	struct mutex		userq_db_mutex;
+	unsigned long		*userq_db_bitmap;
+	u32			userq_db_nslots;	/* qword slots */
 };
 
 /*
@@ -185,6 +199,38 @@ struct amdgpu_buffer_funcs {
 				 uint32_t byte_count);
 };
 
+/*
+ * SDMA usermode-queue doorbell pool.
+ *
+ * The pool re-uses qword doorbell slots inside the firmware-managed NBIO
+ * SDMA decode window (BAR dwords [sdma_engine[0],
+ * sdma_engine[0] + sdma_doorbell_range * num_instances)) — that range is
+ * the only one whose writes are routed to the SDMA back-end.  The kernel
+ * SDMA ring slots are pre-marked so they keep working alongside any
+ * number of SDMA UMQs.
+ *
+ * The window is exposed to userspace via a per-device drm_gem_object that
+ * userspace mmap()s through the standard GEM_MMAP path; per-fpriv handles
+ * are minted on demand by the AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl.  No
+ * file_operations override and no fixed mmap pgoff sentinel.
+ *
+ * FIXME: KFD's SDMA queue doorbells (kgd_*_hqd_sdma_get_doorbell on chips
+ * with a non-stub implementation, e.g. gfx9.4.3) are computed
+ * from the same adev->doorbell_index.sdma_engine[] array and would
+ * overlap with this pool.  On gfx12 the kgd hook stubs to 0, so there is
+ * no immediate conflict.  A shared per-adev allocator that both
+ * KFD and amdgpu UMQ call into is the longer-term fix.
+ */
+
+struct amdgpu_sdma_userq_db_obj {
+	struct drm_gem_object	base;
+	resource_size_t		phys_base;	/* BAR phys addr of window start */
+	u32			size;		/* window size in bytes */
+};
+
+#define to_amdgpu_sdma_userq_db(_obj) \
+	container_of(_obj, struct amdgpu_sdma_userq_db_obj, base)
+
 int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
 			     bool caller_handles_kernel_queues);
 
@@ -205,6 +251,15 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
 				      struct amdgpu_iv_entry *entry);
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
 			       bool duplicate);
+struct drm_file;
+struct amdgpu_sdma_userq_db_obj;
+int amdgpu_sdma_userq_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_sdma_userq_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_sdma_userq_doorbell_alloc(struct amdgpu_device *adev, u32 *out_slot);
+void amdgpu_sdma_userq_doorbell_free(struct amdgpu_device *adev, u32 slot);
+int amdgpu_sdma_userq_doorbell_create_handle(struct amdgpu_device *adev,
+					     struct drm_file *filp,
+					     u32 *handle, u32 *size_bytes);
 void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
         bool duplicate);
 int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

User-allocated DOORBELL BOs land at BAR offsets outside the
firmware-managed NBIO SDMA decode window and cannot reach the SDMA
back-end.  For AMDGPU_HW_IP_DMA queues, ignore the user-supplied
doorbell index and allocate one from the per-device pool added in the
previous patch.  Track the assigned slot id on the queue so it can be
returned to the bitmap on destroy.

Add a new sdma_doorbell_offset_bytes field to drm_amdgpu_userq_out
that tells userspace where its kernel-allocated qword slot lives
inside the BO it will mmap (the BO handle comes from the
AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl added later in the series).

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 47 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h |  7 ++++
 include/uapi/drm/amdgpu_drm.h             |  8 ++++
 3 files changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 8f48520cb822..cea0f9cb59d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -662,6 +662,10 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
 	amdgpu_bo_unpin(queue->wptr_obj.obj);
 	amdgpu_bo_unreserve(queue->wptr_obj.obj);
 	amdgpu_bo_unref(&queue->wptr_obj.obj);
+
+	if (queue->sdma_userq_db_slot >= 0)
+		amdgpu_sdma_userq_doorbell_free(adev,
+					(u32)queue->sdma_userq_db_slot);
 	kfree(queue);
 
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -762,6 +766,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 	queue->queue_type = args->in.ip_type;
 	queue->vm = &fpriv->vm;
 	queue->priority = priority;
+	queue->sdma_userq_db_slot = -1;
 
 	db_info.queue_type = queue->queue_type;
 	db_info.doorbell_handle = queue->doorbell_handle;
@@ -792,6 +797,38 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 		goto clean_mapping;
 	}
 
+	/*
+	 * SDMA UMQ doorbell override:  user-allocated DOORBELL BOs land at
+	 * BAR offsets outside the firmware-managed NBIO SDMA decode window
+	 * and cannot reach the SDMA back-end.  Replace the user-supplied
+	 * doorbell index with one allocated from the per-device
+	 * sdma.userq_db_obj BO that sits inside the routable window.
+	 * Userspace fetches a GEM handle for that BO via
+	 * AMDGPU_INFO_SDMA_USERQ_DOORBELL and mmap()s it through the
+	 * standard GEM_MMAP path; sdma_doorbell_offset_bytes (returned in
+	 * args->out) tells userspace where inside that mapping its slot
+	 * lives.
+	 */
+	if (queue->queue_type == AMDGPU_HW_IP_DMA &&
+	    adev->sdma.userq_db_obj) {
+		u32 slot_id;
+
+		r = amdgpu_sdma_userq_doorbell_alloc(adev, &slot_id);
+		if (r) {
+			drm_file_err(uq_mgr->file,
+				     "SDMA UMQ doorbell pool exhausted (err=%d)\n",
+				     r);
+			goto clean_mapping;
+		}
+		/*
+		 * Slot id is a qword index inside the routable window;
+		 * convert to absolute BAR dword index.
+		 */
+		index = (u64)(adev->doorbell_index.sdma_engine[0] << 1) +
+			(u64)slot_id * 2;
+		queue->sdma_userq_db_slot = (int)slot_id;
+	}
+
 	queue->doorbell_index = index;
 	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
 	r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
@@ -851,6 +888,16 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 	amdgpu_userq_init_hang_detect_work(queue);
 
 	args->out.queue_id = qid;
+	if (queue->sdma_userq_db_slot >= 0) {
+		/*
+		 * Tell userspace where inside its mmap of the SDMA UMQ
+		 * doorbell BO (handle returned by
+		 * AMDGPU_INFO_SDMA_USERQ_DOORBELL) the assigned qword slot
+		 * lives.
+		 */
+		args->out.sdma_doorbell_offset_bytes =
+			(u64)queue->sdma_userq_db_slot * sizeof(u64);
+	}
 	atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
 	mutex_unlock(&uq_mgr->userq_mutex);
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 675fe6395ac8..cdfced627dec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -59,6 +59,13 @@ struct amdgpu_usermode_queue {
 	uint64_t		doorbell_handle;
 	uint64_t		doorbell_index;
 	uint64_t		flags;
+	/*
+	 * For SDMA UMQs whose doorbell came from the kernel-managed pool
+	 * (amdgpu_sdma_userq_doorbell_alloc), record the slot id so it can
+	 * be returned to the bitmap on queue destroy.  -1 means the queue
+	 * is using a user-supplied doorbell BO.
+	 */
+	int			sdma_userq_db_slot;
 	struct amdgpu_mqd_prop	*userq_prop;
 	struct amdgpu_userq_mgr *userq_mgr;
 	struct amdgpu_vm	*vm;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 9f3090db2f16..79e8bbda046b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -421,6 +421,14 @@ struct drm_amdgpu_userq_out {
 	 */
 	__u32	queue_id;
 	__u32 _pad;
+	/**
+	 * For SDMA usermode queues whose doorbell was assigned by the
+	 * kernel from the per-device pool (see AMDGPU_INFO_SDMA_USERQ_DOORBELL),
+	 * this field carries the byte offset of the assigned slot inside
+	 * the routable doorbell window so userspace can write there.
+	 * 0 means the kernel did not override the user's doorbell.
+	 */
+	__u64	sdma_doorbell_offset_bytes;
 };
 
 union drm_amdgpu_userq {
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
  2026-04-24  8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 04/10] drm/amdgpu/sdma6: " Jesse Zhang
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

Call amdgpu_sdma_userq_doorbell_init() at sw_init time (gated on
userq_funcs[AMDGPU_HW_IP_DMA] being set so we only run on chips that
actually expose SDMA UMQs) and amdgpu_sdma_userq_doorbell_fini() at
sw_fini, so SDMA usermode queues get doorbells from the
firmware-managed NBIO routable window.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 85d98a0e1bff..5f6c51ba7ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1378,6 +1378,14 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
 		break;
 	}
 
+	/*
+	 * Init the SDMA usermode-queue doorbell pool inside the firmware-
+	 * managed NBIO S2A SDMA decode window so user SDMA UMQs get
+	 * doorbells that are actually routable to the SDMA back-end.
+	 */
+	if (adev->userq_funcs[AMDGPU_HW_IP_DMA])
+		amdgpu_sdma_userq_doorbell_init(adev);
+
 	return r;
 }
 
@@ -1389,6 +1397,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	amdgpu_sdma_userq_doorbell_fini(adev);
 	amdgpu_sdma_sysfs_reset_mask_fini(adev);
 	amdgpu_sdma_destroy_inst_ctx(adev, true);
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 04/10] drm/amdgpu/sdma6: register SDMA UMQ doorbell pool
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
  2026-04-24  8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
  2026-04-24  8:18 ` [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

Call amdgpu_sdma_userq_doorbell_init() at sw_init time (gated on
userq_funcs[AMDGPU_HW_IP_DMA] being set so we only run on chips that
actually expose SDMA UMQs) and amdgpu_sdma_userq_doorbell_fini() at
sw_fini, so SDMA usermode queues get doorbells from the
firmware-managed NBIO routable window.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index de329b76a00c..02eeac3b2e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1426,6 +1426,14 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
 	if (r)
 		return r;
 
+	/*
+	 * Init the SDMA usermode-queue doorbell pool inside the firmware-
+	 * managed NBIO SDMA decode window so user SDMA UMQs get doorbells
+	 * that are actually routable to the SDMA back-end.
+	 */
+	if (adev->userq_funcs[AMDGPU_HW_IP_DMA])
+		amdgpu_sdma_userq_doorbell_init(adev);
+
 	return r;
 }
 
@@ -1437,6 +1445,7 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	amdgpu_sdma_userq_doorbell_fini(adev);
 	amdgpu_sdma_sysfs_reset_mask_fini(adev);
 	amdgpu_sdma_destroy_inst_ctx(adev, true);
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
                   ` (2 preceding siblings ...)
  2026-04-24  8:18 ` [PATCH 04/10] drm/amdgpu/sdma6: " Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:29   ` Christian König
  2026-04-24  8:18 ` [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
Userspace mmap()s that handle through the standard
AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
routable doorbell BAR window; each created SDMA usermode queue's
qword-slot offset inside that mapping is reported in
drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
 include/uapi/drm/amdgpu_drm.h           | 16 ++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d88e4994c8c1..dbcfbe418e42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			return -EINVAL;
 		}
 	}
+	case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
+		struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
+		int r;
+
+		r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
+							     &db_info.handle,
+							     &db_info.size_bytes);
+		if (r)
+			return r;
+		return copy_to_user(out, &db_info,
+				    min((size_t)size, sizeof(db_info)))
+			? -EFAULT : 0;
+	}
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
 		return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 79e8bbda046b..533be8ad8a7e 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
 #define AMDGPU_INFO_GPUVM_FAULT			0x23
 /* query FW object size and alignment */
 #define AMDGPU_INFO_UQ_FW_AREAS			0x24
+/*
+ * SDMA usermode-queue doorbell window query.  Returns a per-fpriv GEM
+ * handle for a kernel-owned BO that backs the routable SDMA doorbell
+ * window, plus its byte size.  Userspace mmap()s the BO via the standard
+ * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
+ * SDMA usermode queue's slot offset inside that mapping is returned in
+ * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
+ */
+#define AMDGPU_INFO_SDMA_USERQ_DOORBELL		0x25
+
+struct drm_amdgpu_info_sdma_userq_doorbell {
+	/* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
+	__u32 handle;
+	/* Byte size of the BO (== mmappable window size). */
+	__u32 size_bytes;
+};
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
                   ` (3 preceding siblings ...)
  2026-04-24  8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

Kernel-side abstraction work for the SDMA usermode-queue plumbing
landed in subsequent per-engine patches:

- mes_add_queue_input gains is_user_mode_submission and
  unmap_flag_addr.  Without is_user_mode_submission MES treats SDMA
  queues as kernel-managed and uses the end-of-MQD slot for the unmap
  flag, so PROTECTED_FENCE at the tail of every SDMA IB looks like a
  "queue done" signal and MES gangs the queue out forever.

- mes_misc_opcode gains MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE
  with a notify_work.priority_level payload.  This wakes a gangs-out
  SDMA UMQ so subsequent IBs get re-mapped (SDMA has no
  CP_UNMAPPED_DOORBELL HW intercept).

Also surface the matching firmware bits in mes_v12_api_def.h:
is_user_mode_submission / enable_perf_profiling /
exclude_process_limit / is_video_blit_queue bitfields in
MESAPI__ADD_QUEUE, and the unmap_flag_addr packet field.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h       |  7 +++++++
 drivers/gpu/drm/amd/include/mes_v12_api_def.h | 12 +++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index cafc5caae822..705056de94b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -265,6 +265,8 @@ struct mes_add_queue_input {
 	uint32_t	exclusively_scheduled;
 	uint32_t	sh_mem_config_data;
 	uint32_t	vm_cntx_cntl;
+	uint32_t	is_user_mode_submission;
+	uint64_t	unmap_flag_addr;
 };
 
 struct mes_remove_queue_input {
@@ -343,6 +345,7 @@ enum mes_misc_opcode {
 	MES_MISC_OP_WRM_REG_WR_WAIT,
 	MES_MISC_OP_SET_SHADER_DEBUGGER,
 	MES_MISC_OP_CHANGE_CONFIG,
+	MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE,
 };
 
 struct mes_misc_op_input {
@@ -397,6 +400,10 @@ struct mes_misc_op_input {
 				uint32_t tdr_delay;
 			} tdr_config;
 		} change_config;
+
+		struct {
+			uint32_t priority_level;
+		} notify_work;
 	};
 };
 
diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
index e541a43714a1..cd6e60184a06 100644
--- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
@@ -381,7 +381,11 @@ union MESAPI__ADD_QUEUE {
 			uint32_t exclusively_scheduled : 1;
 			uint32_t is_long_running : 1;
 			uint32_t is_dwm_queue : 1;
-			uint32_t reserved	 : 15;
+			uint32_t is_video_blit_queue : 1;
+			uint32_t is_user_mode_submission : 1;
+			uint32_t enable_perf_profiling : 1;
+			uint32_t exclude_process_limit : 1;
+			uint32_t reserved	 : 11;
 		};
 		struct MES_API_STATUS	api_status;
 		uint64_t		tma_addr;
@@ -393,6 +397,12 @@ union MESAPI__ADD_QUEUE {
 		uint32_t		queue_id;
 		uint32_t		alignment_mode_setting;
 		uint32_t		full_sh_mem_config_data;
+		/*
+		 * MC addr where MES writes 1 when it unmaps the queue.  Used
+		 * by user-mode SDMA UMQs so the kernel/userspace can detect
+		 * the unmapped state and re-arm work via NOTIFY_WORK_ON_UNMAPPED_QUEUE.
+		 */
+		uint64_t		unmap_flag_addr;
 	};
 
 	uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
                   ` (4 preceding siblings ...)
  2026-04-24  8:18 ` [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

Pass the new mes_add_queue_input.unmap_flag_addr through to the
MESAPI__ADD_QUEUE packet, and route MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE
to the matching MESAPI_MISC opcode.

Note: the MES v11 firmware spec does not (yet) carry a per-queue
is_user_mode_submission bit, so SDMA UMQs on chips with MES v11 may
still see PROTECTED_FENCE-as-queue-done behaviour after the first IB
until firmware adds the bit.  The wakeup mechanism (NOTIFY) is wired
up so that path is ready when firmware lands.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index a926a330700e..575cc4a684b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -362,6 +362,16 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
 
 	mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
 
+	/*
+	 * unmap_flag_addr is plumbed through but only honoured by MES when
+	 * the global use_add_queue_unmap_flag_addr flag is set in
+	 * SET_HW_RESOURCES.  MES v11 firmware spec does not carry a
+	 * per-queue is_user_mode_submission bit, so SDMA UMQs on chips with
+	 * MES v11 may still see PROTECTED_FENCE-as-queue-done behaviour
+	 * until firmware adds the bit.
+	 */
+	mes_add_queue_pkt.unmap_flag_addr = input->unmap_flag_addr;
+
 	return mes_v11_0_submit_pkt_and_poll_completion(mes,
 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
 			offsetof(union MESAPI__ADD_QUEUE, api_status));
@@ -660,6 +670,10 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
 		misc_pkt.change_config.option.bits.limit_single_process =
 				input->change_config.option.limit_single_process;
 		break;
+	case MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE:
+		misc_pkt.opcode = MESAPI_MISC__NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+		misc_pkt.queue_sch_level = input->notify_work.priority_level;
+		break;
 
 	default:
 		drm_err(adev_to_drm(mes->adev), "unsupported misc op (%d)\n", input->op);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
                   ` (5 preceding siblings ...)
  2026-04-24  8:18 ` [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
  2026-04-24  8:18 ` [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

Pass is_user_mode_submission and unmap_flag_addr from
mes_add_queue_input through to MESAPI__ADD_QUEUE in both mes_v12_0
and mes_v12_1 add_hw_queue paths, and route
MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE to the matching MESAPI_MISC
opcode.

The kernel-side caller that actually sets is_user_mode_submission for
SDMA UMQs lives in a later patch; this one is just the engine-level
plumbing.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/mes_v12_1.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 023c7345ea54..5acc505533f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -342,6 +342,8 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
 	mes_add_queue_pkt.trap_en = input->trap_en;
 	mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
 	mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+	mes_add_queue_pkt.is_user_mode_submission = input->is_user_mode_submission;
+	mes_add_queue_pkt.unmap_flag_addr = input->unmap_flag_addr;
 
 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
@@ -697,6 +699,10 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
 		misc_pkt.change_config.option.bits.limit_single_process =
 				input->change_config.option.limit_single_process;
 		break;
+	case MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE:
+		misc_pkt.opcode = MESAPI_MISC__NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+		misc_pkt.queue_sch_level = input->notify_work.priority_level;
+		break;
 
 	default:
 		DRM_ERROR("unsupported misc op (%d)\n", input->op);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 2d8a10d18939..235dbbf99ec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -325,6 +325,8 @@ static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
 	mes_add_queue_pkt.trap_en = input->trap_en;
 	mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
 	mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+	mes_add_queue_pkt.is_user_mode_submission = input->is_user_mode_submission;
+	mes_add_queue_pkt.unmap_flag_addr = input->unmap_flag_addr;
 
 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
@@ -598,6 +600,10 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
 		misc_pkt.change_config.option.bits.limit_single_process =
 			input->change_config.option.limit_single_process;
 		break;
+	case MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE:
+		misc_pkt.opcode = MESAPI_MISC__NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+		misc_pkt.queue_sch_level = input->notify_work.priority_level;
+		break;
 	default:
 		DRM_ERROR("unsupported misc op (%d) \n", input->op);
 		return -EINVAL;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
                   ` (6 preceding siblings ...)
  2026-04-24  8:18 ` [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  2026-04-24  8:18 ` [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="Y", Size: 2382 bytes --]

From: "Jesse.zhang" <Jesse.zhang@amd.com>

For AMDGPU_HW_IP_DMA queues, set mes_add_queue_input.is_user_mode_submission
and a stable unmap_flag_addr (a kernel-owned dword in the MQD
object's tail padding).  This tells MES to use the new wptr_mc /
unmap_flag scheme so the PROTECTED_FENCE at the tail of every SDMA
IB no longer terminates the queue.  Combined with the
NOTIFY_WORK_ON_UNMAPPED_QUEUE wakeup added in a follow-up patch, this
lets multi-IB submissions on a single SDMA UMQ work end-to-end.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index d12cd1b7790b..3dbcddb46b24 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -165,6 +165,28 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue)
 	queue_input.doorbell_offset = userq_props->doorbell_index;
 	queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
 	queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+	/*
+	 * SDMA UMQs need is_user_mode_submission so MES treats them as user
+	 * queues (using the new wptr_mc_addr / unmap_flag_addr scheme).
+	 * Without this MES uses end-of-MQD for unmap_flag, sees PROTECTED_FENCE
+	 * as a "queue done" signal, and gangs the queue out forever.  Combined
+	 * with NOTIFY_WORK_ON_UNMAPPED_QUEUE poke from amdgpu_userq_signal_ioctl
+	 * this lets multi-IB submissions work.  Use queue->mqd.gpu_addr +
+	 * mqd_size as a stable kernel-owned location for unmap_flag — userspace
+	 * never reads it; the kernel just needs SOMETHING valid to give MES.
+	 */
+	if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+		queue_input.is_user_mode_submission = 1;
+		/*
+		 * Same offset MES would derive in legacy mode
+		 * (get_unmap_flag_addr_from_end_of_mqd in MES src 12).  Lives
+		 * inside the allocated MQD object's tail padding so it's a
+		 * valid MC address; the kernel never reads it back — its only
+		 * purpose is to keep MES happy.
+		 */
+		queue_input.unmap_flag_addr = queue->mqd.gpu_addr +
+			adev->mqds[queue->queue_type].mqd_size + sizeof(u32);
+	}
 
 	amdgpu_mes_lock(&adev->mes);
 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY
  2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
                   ` (7 preceding siblings ...)
  2026-04-24  8:18 ` [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
@ 2026-04-24  8:18 ` Jesse Zhang
  8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24  8:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang

From: "Jesse.zhang" <Jesse.zhang@amd.com>

SDMA has no CP_UNMAPPED_DOORBELL HW intercept, so once MES gangs the
queue out (after the first IB idles it) per-queue doorbell rings from
userspace hit a mapped-out HW slot and are silently dropped: rptr
stops advancing and FENCE IRQ never fires.

After the SDMA UMQ's first IB has actually completed
(fence_drv->cpu_addr != 0), issue
MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE and ring the priority's
aggregated doorbell so MES re-evaluates scheduling and re-maps the
queue for the next IB.  The first submission is intentionally skipped
— the queue is still mapped from MAP_QUEUE then, and an extra notify
would race the initial scheduling.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index a58342c2ac44..6ef4cbd5d5da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -598,6 +598,39 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 	/* drop the reference acquired in fence creation function */
 	dma_fence_put(fence);
 
+	/*
+	 * SDMA UMQ wake: SDMA has no CP_UNMAPPED_DOORBELL HW intercept, so
+	 * once MES gangs the queue out (after the first IB's PROTECTED_FENCE
+	 * idles the queue), subsequent per-queue doorbell rings hit a
+	 * mapped-out HW slot and are silently ignored — rptr stops
+	 * advancing, FENCE IRQ never fires.  The MES MISC API
+	 * NOTIFY_WORK_ON_UNMAPPED_QUEUE flips MES's hasReadyQueues flag for
+	 * the queue's priority level, which makes MES re-evaluate
+	 * scheduling and re-map our SDMA UMQ for the next IB.
+	 *
+	 * Skip on the very first submission (fence_drv->cpu_addr == 0
+	 * means SDMA hasn't completed any IB yet, so MES still has the
+	 * queue mapped from MAP_QUEUE — calling NOTIFY here would race the
+	 * initial scheduling and starve the first IB).
+	 */
+	if (queue && queue->queue_type == AMDGPU_HW_IP_DMA &&
+	    adev->enable_mes && adev->mes.funcs->misc_op &&
+	    queue->fence_drv && queue->fence_drv->cpu_addr &&
+	    le64_to_cpu(*queue->fence_drv->cpu_addr) != 0) {
+		struct mes_misc_op_input op = { 0 };
+		u32 agg_db = adev->mes.aggregated_doorbells[
+				AMDGPU_MES_PRIORITY_LEVEL_NORMAL];
+
+		op.op = MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+		op.notify_work.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+		amdgpu_mes_lock(&adev->mes);
+		(void)adev->mes.funcs->misc_op(&adev->mes, &op);
+		amdgpu_mes_unlock(&adev->mes);
+
+		if (agg_db)
+			WDOORBELL64(agg_db, queue->doorbell_index);
+	}
+
 exec_fini:
 	drm_exec_fini(&exec);
 put_gobj_write:
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
  2026-04-24  8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
@ 2026-04-24  8:29   ` Christian König
  2026-04-24 13:27     ` Alex Deucher
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2026-04-24  8:29 UTC (permalink / raw)
  To: Jesse Zhang, amd-gfx; +Cc: Alexander.Deucher

On 4/24/26 10:18, Jesse Zhang wrote:
> From: "Jesse.zhang" <Jesse.zhang@amd.com>
> 
> New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> Userspace mmap()s that handle through the standard
> AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> routable doorbell BAR window; each created SDMA usermode queue's
> qword-slot offset inside that mapping is reported in
> drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.

We added the separate IOCTL for this purpose on the KFD/KGD unification branch. I think we should just cherry pick that over to amd-staging-drm-next.

@Alex what do you think?

Regards,
Christian.

> 
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
>  include/uapi/drm/amdgpu_drm.h           | 16 ++++++++++++++++
>  2 files changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d88e4994c8c1..dbcfbe418e42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  			return -EINVAL;
>  		}
>  	}
> +	case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> +		struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> +		int r;
> +
> +		r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> +							     &db_info.handle,
> +							     &db_info.size_bytes);
> +		if (r)
> +			return r;
> +		return copy_to_user(out, &db_info,
> +				    min((size_t)size, sizeof(db_info)))
> +			? -EFAULT : 0;
> +	}
>  	default:
>  		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
>  		return -EINVAL;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 79e8bbda046b..533be8ad8a7e 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
>  #define AMDGPU_INFO_GPUVM_FAULT			0x23
>  /* query FW object size and alignment */
>  #define AMDGPU_INFO_UQ_FW_AREAS			0x24
> +/*
> + * SDMA usermode-queue doorbell window query.  Returns a per-fpriv GEM
> + * handle for a kernel-owned BO that backs the routable SDMA doorbell
> + * window, plus its byte size.  Userspace mmap()s the BO via the standard
> + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
> + * SDMA usermode queue's slot offset inside that mapping is returned in
> + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> + */
> +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL		0x25
> +
> +struct drm_amdgpu_info_sdma_userq_doorbell {
> +	/* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> +	__u32 handle;
> +	/* Byte size of the BO (== mmappable window size). */
> +	__u32 size_bytes;
> +};
>  
>  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
>  #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
  2026-04-24  8:29   ` Christian König
@ 2026-04-24 13:27     ` Alex Deucher
  2026-04-24 13:32       ` Alex Deucher
  0 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2026-04-24 13:27 UTC (permalink / raw)
  To: Christian König; +Cc: Jesse Zhang, amd-gfx, Alexander.Deucher

On Fri, Apr 24, 2026 at 4:39 AM Christian König
<christian.koenig@amd.com> wrote:
>
> On 4/24/26 10:18, Jesse Zhang wrote:
> > From: "Jesse.zhang" <Jesse.zhang@amd.com>
> >
> > New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> > kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> > Userspace mmap()s that handle through the standard
> > AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> > routable doorbell BAR window; each created SDMA usermode queue's
> > qword-slot offset inside that mapping is reported in
> > drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
>
> We added the separate IOCTL for this purpose on the KFD/KGD unification branch. I think we should just cherry pick that over to amd-staging-drm-next.
>
> @Alex what do you think?

Yes, we already have a patch for this and another fix for compute
queues in the unification branch.  We were just waiting on mesa to
make use of it.

Alex

>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> >  include/uapi/drm/amdgpu_drm.h           | 16 ++++++++++++++++
> >  2 files changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index d88e4994c8c1..dbcfbe418e42 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> >                       return -EINVAL;
> >               }
> >       }
> > +     case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> > +             struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> > +             int r;
> > +
> > +             r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> > +                                                          &db_info.handle,
> > +                                                          &db_info.size_bytes);
> > +             if (r)
> > +                     return r;
> > +             return copy_to_user(out, &db_info,
> > +                                 min((size_t)size, sizeof(db_info)))
> > +                     ? -EFAULT : 0;
> > +     }
> >       default:
> >               DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> >               return -EINVAL;
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 79e8bbda046b..533be8ad8a7e 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> >  #define AMDGPU_INFO_GPUVM_FAULT                      0x23
> >  /* query FW object size and alignment */
> >  #define AMDGPU_INFO_UQ_FW_AREAS                      0x24
> > +/*
> > + * SDMA usermode-queue doorbell window query.  Returns a per-fpriv GEM
> > + * handle for a kernel-owned BO that backs the routable SDMA doorbell
> > + * window, plus its byte size.  Userspace mmap()s the BO via the standard
> > + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
> > + * SDMA usermode queue's slot offset inside that mapping is returned in
> > + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > + */
> > +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL              0x25
> > +
> > +struct drm_amdgpu_info_sdma_userq_doorbell {
> > +     /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> > +     __u32 handle;
> > +     /* Byte size of the BO (== mmappable window size). */
> > +     __u32 size_bytes;
> > +};
> >
> >  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT       0
> >  #define AMDGPU_INFO_MMR_SE_INDEX_MASK        0xff
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
  2026-04-24 13:27     ` Alex Deucher
@ 2026-04-24 13:32       ` Alex Deucher
  2026-04-27  8:33         ` Zhang, Jesse(Jie)
  0 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2026-04-24 13:32 UTC (permalink / raw)
  To: Christian König; +Cc: Jesse Zhang, amd-gfx, Alexander.Deucher

On Fri, Apr 24, 2026 at 9:27 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Fri, Apr 24, 2026 at 4:39 AM Christian König
> <christian.koenig@amd.com> wrote:
> >
> > On 4/24/26 10:18, Jesse Zhang wrote:
> > > From: "Jesse.zhang" <Jesse.zhang@amd.com>
> > >
> > > New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> > > kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> > > Userspace mmap()s that handle through the standard
> > > AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> > > routable doorbell BAR window; each created SDMA usermode queue's
> > > qword-slot offset inside that mapping is reported in
> > > drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> >
> > We added the separate IOCTL for this purpose on the KFD/KGD unification branch. I think we should just cherry pick that over to amd-staging-drm-next.
> >
> > @Alex what do you think?
>
> Yes, we already have a patch for this and another fix for compute
> queues in the unification branch.  We were just waiting on mesa to
> make use of it.

It makes sense to land those patches, but the doorbell offset stuff is
part of David's VCN user queue patch set.  See:
https://lists.freedesktop.org/archives/amd-gfx/2026-February/138619.html

Alex

>
> Alex
>
> >
> > Regards,
> > Christian.
> >
> > >
> > > Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> > >  include/uapi/drm/amdgpu_drm.h           | 16 ++++++++++++++++
> > >  2 files changed, 29 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index d88e4994c8c1..dbcfbe418e42 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> > >                       return -EINVAL;
> > >               }
> > >       }
> > > +     case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> > > +             struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> > > +             int r;
> > > +
> > > +             r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> > > +                                                          &db_info.handle,
> > > +                                                          &db_info.size_bytes);
> > > +             if (r)
> > > +                     return r;
> > > +             return copy_to_user(out, &db_info,
> > > +                                 min((size_t)size, sizeof(db_info)))
> > > +                     ? -EFAULT : 0;
> > > +     }
> > >       default:
> > >               DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> > >               return -EINVAL;
> > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > > index 79e8bbda046b..533be8ad8a7e 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> > >  #define AMDGPU_INFO_GPUVM_FAULT                      0x23
> > >  /* query FW object size and alignment */
> > >  #define AMDGPU_INFO_UQ_FW_AREAS                      0x24
> > > +/*
> > > + * SDMA usermode-queue doorbell window query.  Returns a per-fpriv GEM
> > > + * handle for a kernel-owned BO that backs the routable SDMA doorbell
> > > + * window, plus its byte size.  Userspace mmap()s the BO via the standard
> > > + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
> > > + * SDMA usermode queue's slot offset inside that mapping is returned in
> > > + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > > + */
> > > +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL              0x25
> > > +
> > > +struct drm_amdgpu_info_sdma_userq_doorbell {
> > > +     /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> > > +     __u32 handle;
> > > +     /* Byte size of the BO (== mmappable window size). */
> > > +     __u32 size_bytes;
> > > +};
> > >
> > >  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT       0
> > >  #define AMDGPU_INFO_MMR_SE_INDEX_MASK        0xff
> >

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
  2026-04-24 13:32       ` Alex Deucher
@ 2026-04-27  8:33         ` Zhang, Jesse(Jie)
  0 siblings, 0 replies; 14+ messages in thread
From: Zhang, Jesse(Jie) @ 2026-04-27  8:33 UTC (permalink / raw)
  To: Alex Deucher, Koenig, Christian
  Cc: amd-gfx@lists.freedesktop.org, Deucher,  Alexander

AMD General

> -----Original Message-----
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Friday, April 24, 2026 9:33 PM
> To: Koenig, Christian <Christian.Koenig@amd.com>
> Cc: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; amd-gfx@lists.freedesktop.org;
> Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH 05/10] drm/amdgpu: add
> AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
>
> On Fri, Apr 24, 2026 at 9:27 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Fri, Apr 24, 2026 at 4:39 AM Christian König
> > <christian.koenig@amd.com> wrote:
> > >
> > > On 4/24/26 10:18, Jesse Zhang wrote:
> > > > From: "Jesse.zhang" <Jesse.zhang@amd.com>
> > > >
> > > > New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> > > > kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> > > > Userspace mmap()s that handle through the standard
> > > > AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> > > > routable doorbell BAR window; each created SDMA usermode queue's
> > > > qword-slot offset inside that mapping is reported in
> > > > drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > >
> > > We added the separate IOCTL for this purpose on the KFD/KGD unification
> branch. I think we should just cherry pick that over to amd-staging-drm-next.
> > >
> > > @Alex what do you think?
> >
> > Yes, we already have a patch for this and another fix for compute
> > queues in the unification branch.  We were just waiting on mesa to
> > make use of it.
>
> It makes sense to land those patches, but the doorbell offset stuff is part of David's
> VCN user queue patch set.  See:
> https://lists.freedesktop.org/archives/amd-gfx/2026-February/138619.html

Thanks for the reminder, Alex. I will update the patch.

Thanks
Jesse.
>
> Alex
>
> >
> > Alex
> >
> > >
> > > Regards,
> > > Christian.
> > >
> > > >
> > > > Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> > > > ---
> > > >  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> > > >  include/uapi/drm/amdgpu_drm.h           | 16 ++++++++++++++++
> > > >  2 files changed, 29 insertions(+)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > index d88e4994c8c1..dbcfbe418e42 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev,
> void *data, struct drm_file *filp)
> > > >                       return -EINVAL;
> > > >               }
> > > >       }
> > > > +     case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> > > > +             struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> > > > +             int r;
> > > > +
> > > > +             r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> > > > +                                                          &db_info.handle,
> > > > +                                                          &db_info.size_bytes);
> > > > +             if (r)
> > > > +                     return r;
> > > > +             return copy_to_user(out, &db_info,
> > > > +                                 min((size_t)size, sizeof(db_info)))
> > > > +                     ? -EFAULT : 0;
> > > > +     }
> > > >       default:
> > > >               DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> > > >               return -EINVAL;
> > > > diff --git a/include/uapi/drm/amdgpu_drm.h
> > > > b/include/uapi/drm/amdgpu_drm.h index 79e8bbda046b..533be8ad8a7e
> > > > 100644
> > > > --- a/include/uapi/drm/amdgpu_drm.h
> > > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > > @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> > > >  #define AMDGPU_INFO_GPUVM_FAULT                      0x23
> > > >  /* query FW object size and alignment */
> > > >  #define AMDGPU_INFO_UQ_FW_AREAS                      0x24
> > > > +/*
> > > > + * SDMA usermode-queue doorbell window query.  Returns a
> > > > +per-fpriv GEM
> > > > + * handle for a kernel-owned BO that backs the routable SDMA
> > > > +doorbell
> > > > + * window, plus its byte size.  Userspace mmap()s the BO via the
> > > > +standard
> > > > + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each
> > > > +created
> > > > + * SDMA usermode queue's slot offset inside that mapping is
> > > > +returned in
> > > > + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > > > + */
> > > > +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL              0x25
> > > > +
> > > > +struct drm_amdgpu_info_sdma_userq_doorbell {
> > > > +     /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> > > > +     __u32 handle;
> > > > +     /* Byte size of the BO (== mmappable window size). */
> > > > +     __u32 size_bytes;
> > > > +};
> > > >
> > > >  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT       0
> > > >  #define AMDGPU_INFO_MMR_SE_INDEX_MASK        0xff
> > >

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2026-04-27  8:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-24  8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
2026-04-24  8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
2026-04-24  8:18 ` [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
2026-04-24  8:18 ` [PATCH 04/10] drm/amdgpu/sdma6: " Jesse Zhang
2026-04-24  8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
2026-04-24  8:29   ` Christian König
2026-04-24 13:27     ` Alex Deucher
2026-04-24 13:32       ` Alex Deucher
2026-04-27  8:33         ` Zhang, Jesse(Jie)
2026-04-24  8:18 ` [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
2026-04-24  8:18 ` [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
2026-04-24  8:18 ` [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
2026-04-24  8:18 ` [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
2026-04-24  8:18 ` [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox