AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 01/11] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra
@ 2026-04-27  8:34 Jesse Zhang
  2026-04-27  8:34 ` [PATCH v2 02/11] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
                   ` (10 more replies)
  0 siblings, 11 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-27  8:34 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse Zhang

Add a per-device qword-slot pool covering the firmware-managed NBIO
SDMA decode window (BAR dwords [sdma_engine[0],
sdma_engine[0] + sdma_doorbell_range * num_instances)) — the only
range whose writes are routed to the SDMA back-end.  Kernel SDMA ring
slots are pre-masked at init.

The window is exposed to userspace as a custom drm_gem_object: no TTM
backing, custom .mmap callback that does io_remap_pfn_range from the
SDMA decode window's BAR address.  Per-fpriv GEM handles for that BO
can be minted on demand via amdgpu_sdma_userq_doorbell_create_handle()
so userspace mmap()s through the standard drm_gem_mmap path — no
file_operations override and no fixed mmap pgoff sentinel.

Slots are allocated/freed via amdgpu_sdma_userq_doorbell_alloc/free.
The init/fini and the AMDGPU_INFO_USERQ_DOORBELL ioctl that uses
create_handle land in subsequent patches.

Suggested-by:Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 164 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  55 ++++++++
 2 files changed, 219 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 321310ba2c08..1c61761c0046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -22,6 +22,8 @@
  */
 
 #include <linux/firmware.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_file.h>
 #include "amdgpu.h"
 #include "amdgpu_sdma.h"
 #include "amdgpu_ras.h"
@@ -200,6 +202,168 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
 	       sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
 }
 
+static int amdgpu_sdma_userq_db_obj_mmap(struct drm_gem_object *obj,
+					 struct vm_area_struct *vma)
+{
+	struct amdgpu_sdma_userq_db_obj *db = to_amdgpu_sdma_userq_db(obj);
+
+	if (vma->vm_end - vma->vm_start > round_up(db->size, PAGE_SIZE))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+		     VM_DONTDUMP | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return io_remap_pfn_range(vma, vma->vm_start,
+				  db->phys_base >> PAGE_SHIFT,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static void amdgpu_sdma_userq_db_obj_free(struct drm_gem_object *obj)
+{
+	struct amdgpu_sdma_userq_db_obj *db = to_amdgpu_sdma_userq_db(obj);
+
+	drm_gem_object_release(obj);
+	kfree(db);
+}
+
+static const struct drm_gem_object_funcs amdgpu_sdma_userq_db_obj_funcs = {
+	.free = amdgpu_sdma_userq_db_obj_free,
+	.mmap = amdgpu_sdma_userq_db_obj_mmap,
+};
+
+int amdgpu_sdma_userq_doorbell_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_sdma_userq_db_obj *db;
+	u32 base_dw, size_dw, nslots, ring_dw;
+	int i, r;
+
+	if (!adev->userq_funcs[AMDGPU_HW_IP_DMA])
+		return 0;
+
+	base_dw = adev->doorbell_index.sdma_engine[0] << 1;
+	size_dw = adev->doorbell_index.sdma_doorbell_range *
+		  adev->sdma.num_instances;
+	nslots  = size_dw / 2;	/* qword slots */
+	if (!nslots)
+		return 0;
+
+	db = kzalloc(sizeof(*db), GFP_KERNEL);
+	if (!db)
+		return -ENOMEM;
+
+	db->phys_base = adev->doorbell.base +
+			(resource_size_t)base_dw * sizeof(u32);
+	db->size      = size_dw * sizeof(u32);
+	db->base.funcs = &amdgpu_sdma_userq_db_obj_funcs;
+
+	drm_gem_private_object_init(adev_to_drm(adev), &db->base,
+				    round_up(db->size, PAGE_SIZE));
+	r = drm_gem_create_mmap_offset(&db->base);
+	if (r) {
+		drm_gem_object_put(&db->base);
+		return r;
+	}
+
+	mutex_init(&adev->sdma.userq_db_mutex);
+	adev->sdma.userq_db_bitmap = bitmap_zalloc(nslots, GFP_KERNEL);
+	if (!adev->sdma.userq_db_bitmap) {
+		drm_gem_object_put(&db->base);
+		return -ENOMEM;
+	}
+
+	adev->sdma.userq_db_obj    = db;
+	adev->sdma.userq_db_nslots = nslots;
+
+	/*
+	 * Mask out the qword slots used by the kernel SDMA rings
+	 * (sdma_engine[i] << 1 in absolute BAR dwords ⇒ qword slot
+	 * (sdma_engine[i] - sdma_engine[0]) within this window).
+	 */
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring_dw = adev->doorbell_index.sdma_engine[i] << 1;
+		if (ring_dw >= base_dw && ring_dw < base_dw + size_dw)
+			set_bit((ring_dw - base_dw) / 2,
+				adev->sdma.userq_db_bitmap);
+	}
+
+	dev_info(adev->dev,
+		 "SDMA UMQ doorbell pool: %u qword slots in BAR dword [%u, %u)\n",
+		 nslots, base_dw, base_dw + size_dw);
+	return 0;
+}
+
+void amdgpu_sdma_userq_doorbell_fini(struct amdgpu_device *adev)
+{
+	if (!adev->sdma.userq_db_obj)
+		return;
+	bitmap_free(adev->sdma.userq_db_bitmap);
+	adev->sdma.userq_db_bitmap = NULL;
+	adev->sdma.userq_db_nslots = 0;
+	drm_gem_object_put(&adev->sdma.userq_db_obj->base);
+	adev->sdma.userq_db_obj = NULL;
+}
+
+/*
+ * Allocate one qword doorbell slot.  On success, *out_slot receives the
+ * slot id (also the qword index inside the userspace mmap of the window
+ * BO) which the caller passes back to free.
+ */
+int amdgpu_sdma_userq_doorbell_alloc(struct amdgpu_device *adev, u32 *out_slot)
+{
+	u32 slot;
+
+	if (!adev->sdma.userq_db_obj || !adev->sdma.userq_db_nslots)
+		return -ENODEV;
+
+	mutex_lock(&adev->sdma.userq_db_mutex);
+	slot = find_first_zero_bit(adev->sdma.userq_db_bitmap,
+				   adev->sdma.userq_db_nslots);
+	if (slot >= adev->sdma.userq_db_nslots) {
+		mutex_unlock(&adev->sdma.userq_db_mutex);
+		return -ENOSPC;
+	}
+	set_bit(slot, adev->sdma.userq_db_bitmap);
+	mutex_unlock(&adev->sdma.userq_db_mutex);
+
+	*out_slot = slot;
+	return 0;
+}
+
+void amdgpu_sdma_userq_doorbell_free(struct amdgpu_device *adev, u32 slot)
+{
+	if (!adev->sdma.userq_db_obj)
+		return;
+	if (slot >= adev->sdma.userq_db_nslots)
+		return;
+	mutex_lock(&adev->sdma.userq_db_mutex);
+	clear_bit(slot, adev->sdma.userq_db_bitmap);
+	mutex_unlock(&adev->sdma.userq_db_mutex);
+}
+
+/*
+ * Mint a per-fpriv GEM handle for the per-device SDMA UMQ doorbell BO.
+ * Userspace then uses standard GEM_MMAP / mmap() on /dev/dri/cardN to
+ * obtain a CPU pointer to the routable doorbell window.
+ */
+int amdgpu_sdma_userq_doorbell_create_handle(struct amdgpu_device *adev,
+					     struct drm_file *filp,
+					     u32 *handle, u32 *size_bytes)
+{
+	int r;
+
+	if (!adev->sdma.userq_db_obj)
+		return -ENODEV;
+
+	r = drm_gem_handle_create(filp, &adev->sdma.userq_db_obj->base, handle);
+	if (r)
+		return r;
+
+	*size_bytes = adev->sdma.userq_db_obj->size;
+	return 0;
+}
+
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
 			       u32 instance, bool duplicate)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 2bf365609775..93a7eb9746d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -146,6 +146,20 @@ struct amdgpu_sdma {
 	bool			disable_uq;
 	void (*get_csa_info)(struct amdgpu_device *adev,
 			     struct amdgpu_sdma_csa_info *csa_info);
+
+	/*
+	 * SDMA usermode-queue doorbell pool.  The window covers
+	 * BAR dwords [sdma_engine[0], sdma_engine[0] +
+	 * sdma_doorbell_range * num_instances) — the only range that NBIO
+	 * routes to the SDMA back-end.  Each bit in the bitmap represents
+	 * one qword slot; kernel SDMA ring slots are pre-masked at init.
+	 * The window is exposed to userspace as a custom drm_gem_object
+	 * (userq_db_obj) that is mmap'd via standard GEM_MMAP.
+	 */
+	struct amdgpu_sdma_userq_db_obj *userq_db_obj;
+	struct mutex		userq_db_mutex;
+	unsigned long		*userq_db_bitmap;
+	u32			userq_db_nslots;	/* qword slots */
 };
 
 /*
@@ -185,6 +199,38 @@ struct amdgpu_buffer_funcs {
 				 uint32_t byte_count);
 };
 
+/*
+ * SDMA usermode-queue doorbell pool.
+ *
+ * The pool re-uses qword doorbell slots inside the firmware-managed NBIO
+ * SDMA decode window (BAR dwords [sdma_engine[0],
+ * sdma_engine[0] + sdma_doorbell_range * num_instances)) — that range is
+ * the only one whose writes are routed to the SDMA back-end.  The kernel
+ * SDMA ring slots are pre-marked so they keep working alongside any
+ * number of SDMA UMQs.
+ *
+ * The window is exposed to userspace via a per-device drm_gem_object that
+ * userspace mmap()s through the standard GEM_MMAP path; per-fpriv handles
+ * are minted on demand by the AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl.  No
+ * file_operations override and no fixed mmap pgoff sentinel.
+ *
+ * FIXME: KFD's SDMA queue doorbells (kgd_*_hqd_sdma_get_doorbell on chips
+ * with a non-stub implementation, e.g. gfx9.4.3) are computed
+ * from the same adev->doorbell_index.sdma_engine[] array and would
+ * overlap with this pool.  On gfx12 the kgd hook stubs to 0, so there is
+ * no immediate conflict.  A shared per-adev allocator that both
+ * KFD and amdgpu UMQ call into is the longer-term fix.
+ */
+
+struct amdgpu_sdma_userq_db_obj {
+	struct drm_gem_object	base;
+	resource_size_t		phys_base;	/* BAR phys addr of window start */
+	u32			size;		/* window size in bytes */
+};
+
+#define to_amdgpu_sdma_userq_db(_obj) \
+	container_of(_obj, struct amdgpu_sdma_userq_db_obj, base)
+
 int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
 			     bool caller_handles_kernel_queues);
 
@@ -205,6 +251,15 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
 				      struct amdgpu_iv_entry *entry);
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
 			       bool duplicate);
+struct drm_file;
+struct amdgpu_sdma_userq_db_obj;
+int amdgpu_sdma_userq_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_sdma_userq_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_sdma_userq_doorbell_alloc(struct amdgpu_device *adev, u32 *out_slot);
+void amdgpu_sdma_userq_doorbell_free(struct amdgpu_device *adev, u32 slot);
+int amdgpu_sdma_userq_doorbell_create_handle(struct amdgpu_device *adev,
+					     struct drm_file *filp,
+					     u32 *handle, u32 *size_bytes);
 void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
         bool duplicate);
 int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2026-04-28  9:39 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-27  8:34 [PATCH v2 01/11] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 02/11] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 03/11] drm/amdgpu/gem: only enforce amdgpu_bo access checks on amdgpu_bo objects Jesse Zhang
2026-04-27  8:39   ` Christian König
2026-04-27  8:34 ` [PATCH v2 04/11] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 05/11] drm/amdgpu/sdma6: " Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 06/11] drm/amdgpu: add AMDGPU_INFO_USERQ_DOORBELL ioctl Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 07/11] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 08/11] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 09/11] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 10/11] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
2026-04-27  8:34 ` [PATCH v2 11/11] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang
2026-04-27  8:42 ` [PATCH v2 01/11] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Christian König
2026-04-28  9:39   ` Zhang, Jesse(Jie)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox