From: Jesse Zhang <Jesse.Zhang@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: <Alexander.Deucher@amd.com>,
Christian Koenig <christian.koenig@amd.com>,
Jesse.zhang <Jesse.zhang@amd.com>,
Jesse Zhang <Jesse.Zhang@amd.com>
Subject: [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra
Date: Fri, 24 Apr 2026 16:18:46 +0800 [thread overview]
Message-ID: <20260424081955.873090-1-Jesse.Zhang@amd.com> (raw)
From: "Jesse.zhang" <Jesse.zhang@amd.com>
Add a per-device qword-slot pool covering the firmware-managed NBIO
SDMA decode window (BAR dwords [sdma_engine[0],
sdma_engine[0] + sdma_doorbell_range * num_instances)) — the only
range whose writes are routed to the SDMA back-end. Kernel SDMA ring
slots are pre-masked at init.
The window is exposed to userspace as a custom drm_gem_object: no TTM
backing, custom .mmap callback that does io_remap_pfn_range from the
SDMA decode window's BAR address. Per-fpriv GEM handles for that BO
can be minted on demand via amdgpu_sdma_userq_doorbell_create_handle()
so userspace mmap()s through the standard drm_gem_mmap path — no
file_operations override and no fixed mmap pgoff sentinel.
Slots are allocated/freed via amdgpu_sdma_userq_doorbell_alloc/free.
The init/fini and the AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl that uses
create_handle land in subsequent patches.
Suggested-by:Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 164 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 55 ++++++++
2 files changed, 219 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 321310ba2c08..1c61761c0046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -22,6 +22,8 @@
*/
#include <linux/firmware.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_file.h>
#include "amdgpu.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
@@ -200,6 +202,168 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
}
+static int amdgpu_sdma_userq_db_obj_mmap(struct drm_gem_object *obj,
+ struct vm_area_struct *vma)
+{
+ struct amdgpu_sdma_userq_db_obj *db = to_amdgpu_sdma_userq_db(obj);
+
+ if (vma->vm_end - vma->vm_start > round_up(db->size, PAGE_SIZE))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ return io_remap_pfn_range(vma, vma->vm_start,
+ db->phys_base >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+static void amdgpu_sdma_userq_db_obj_free(struct drm_gem_object *obj)
+{
+ struct amdgpu_sdma_userq_db_obj *db = to_amdgpu_sdma_userq_db(obj);
+
+ drm_gem_object_release(obj);
+ kfree(db);
+}
+
+static const struct drm_gem_object_funcs amdgpu_sdma_userq_db_obj_funcs = {
+ .free = amdgpu_sdma_userq_db_obj_free,
+ .mmap = amdgpu_sdma_userq_db_obj_mmap,
+};
+
+int amdgpu_sdma_userq_doorbell_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_sdma_userq_db_obj *db;
+ u32 base_dw, size_dw, nslots, ring_dw;
+ int i, r;
+
+ if (!adev->userq_funcs[AMDGPU_HW_IP_DMA])
+ return 0;
+
+ base_dw = adev->doorbell_index.sdma_engine[0] << 1;
+ size_dw = adev->doorbell_index.sdma_doorbell_range *
+ adev->sdma.num_instances;
+ nslots = size_dw / 2; /* qword slots */
+ if (!nslots)
+ return 0;
+
+ db = kzalloc(sizeof(*db), GFP_KERNEL);
+ if (!db)
+ return -ENOMEM;
+
+ db->phys_base = adev->doorbell.base +
+ (resource_size_t)base_dw * sizeof(u32);
+ db->size = size_dw * sizeof(u32);
+ db->base.funcs = &amdgpu_sdma_userq_db_obj_funcs;
+
+ drm_gem_private_object_init(adev_to_drm(adev), &db->base,
+ round_up(db->size, PAGE_SIZE));
+ r = drm_gem_create_mmap_offset(&db->base);
+ if (r) {
+ drm_gem_object_put(&db->base);
+ return r;
+ }
+
+ mutex_init(&adev->sdma.userq_db_mutex);
+ adev->sdma.userq_db_bitmap = bitmap_zalloc(nslots, GFP_KERNEL);
+ if (!adev->sdma.userq_db_bitmap) {
+ drm_gem_object_put(&db->base);
+ return -ENOMEM;
+ }
+
+ adev->sdma.userq_db_obj = db;
+ adev->sdma.userq_db_nslots = nslots;
+
+ /*
+ * Mask out the qword slots used by the kernel SDMA rings
+ * (sdma_engine[i] << 1 in absolute BAR dwords ⇒ qword slot
+ * (sdma_engine[i] - sdma_engine[0]) within this window).
+ */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring_dw = adev->doorbell_index.sdma_engine[i] << 1;
+ if (ring_dw >= base_dw && ring_dw < base_dw + size_dw)
+ set_bit((ring_dw - base_dw) / 2,
+ adev->sdma.userq_db_bitmap);
+ }
+
+ dev_info(adev->dev,
+ "SDMA UMQ doorbell pool: %u qword slots in BAR dword [%u, %u)\n",
+ nslots, base_dw, base_dw + size_dw);
+ return 0;
+}
+
+void amdgpu_sdma_userq_doorbell_fini(struct amdgpu_device *adev)
+{
+ if (!adev->sdma.userq_db_obj)
+ return;
+ bitmap_free(adev->sdma.userq_db_bitmap);
+ adev->sdma.userq_db_bitmap = NULL;
+ adev->sdma.userq_db_nslots = 0;
+ drm_gem_object_put(&adev->sdma.userq_db_obj->base);
+ adev->sdma.userq_db_obj = NULL;
+}
+
+/*
+ * Allocate one qword doorbell slot. On success, *out_slot receives the
+ * slot id (also the qword index inside the userspace mmap of the window
+ * BO) which the caller passes back to free.
+ */
+int amdgpu_sdma_userq_doorbell_alloc(struct amdgpu_device *adev, u32 *out_slot)
+{
+ u32 slot;
+
+ if (!adev->sdma.userq_db_obj || !adev->sdma.userq_db_nslots)
+ return -ENODEV;
+
+ mutex_lock(&adev->sdma.userq_db_mutex);
+ slot = find_first_zero_bit(adev->sdma.userq_db_bitmap,
+ adev->sdma.userq_db_nslots);
+ if (slot >= adev->sdma.userq_db_nslots) {
+ mutex_unlock(&adev->sdma.userq_db_mutex);
+ return -ENOSPC;
+ }
+ set_bit(slot, adev->sdma.userq_db_bitmap);
+ mutex_unlock(&adev->sdma.userq_db_mutex);
+
+ *out_slot = slot;
+ return 0;
+}
+
+void amdgpu_sdma_userq_doorbell_free(struct amdgpu_device *adev, u32 slot)
+{
+ if (!adev->sdma.userq_db_obj)
+ return;
+ if (slot >= adev->sdma.userq_db_nslots)
+ return;
+ mutex_lock(&adev->sdma.userq_db_mutex);
+ clear_bit(slot, adev->sdma.userq_db_bitmap);
+ mutex_unlock(&adev->sdma.userq_db_mutex);
+}
+
+/*
+ * Mint a per-fpriv GEM handle for the per-device SDMA UMQ doorbell BO.
+ * Userspace then uses standard GEM_MMAP / mmap() on /dev/dri/cardN to
+ * obtain a CPU pointer to the routable doorbell window.
+ */
+int amdgpu_sdma_userq_doorbell_create_handle(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ u32 *handle, u32 *size_bytes)
+{
+ int r;
+
+ if (!adev->sdma.userq_db_obj)
+ return -ENODEV;
+
+ r = drm_gem_handle_create(filp, &adev->sdma.userq_db_obj->base, handle);
+ if (r)
+ return r;
+
+ *size_bytes = adev->sdma.userq_db_obj->size;
+ return 0;
+}
+
int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
u32 instance, bool duplicate)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 2bf365609775..93a7eb9746d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -146,6 +146,20 @@ struct amdgpu_sdma {
bool disable_uq;
void (*get_csa_info)(struct amdgpu_device *adev,
struct amdgpu_sdma_csa_info *csa_info);
+
+ /*
+ * SDMA usermode-queue doorbell pool. The window covers
+ * BAR dwords [sdma_engine[0], sdma_engine[0] +
+ * sdma_doorbell_range * num_instances) — the only range that NBIO
+ * routes to the SDMA back-end. Each bit in the bitmap represents
+ * one qword slot; kernel SDMA ring slots are pre-masked at init.
+ * The window is exposed to userspace as a custom drm_gem_object
+ * (userq_db_obj) that is mmap'd via standard GEM_MMAP.
+ */
+ struct amdgpu_sdma_userq_db_obj *userq_db_obj;
+ struct mutex userq_db_mutex;
+ unsigned long *userq_db_bitmap;
+ u32 userq_db_nslots; /* qword slots */
};
/*
@@ -185,6 +199,38 @@ struct amdgpu_buffer_funcs {
uint32_t byte_count);
};
+/*
+ * SDMA usermode-queue doorbell pool.
+ *
+ * The pool re-uses qword doorbell slots inside the firmware-managed NBIO
+ * SDMA decode window (BAR dwords [sdma_engine[0],
+ * sdma_engine[0] + sdma_doorbell_range * num_instances)) — that range is
+ * the only one whose writes are routed to the SDMA back-end. The kernel
+ * SDMA ring slots are pre-marked so they keep working alongside any
+ * number of SDMA UMQs.
+ *
+ * The window is exposed to userspace via a per-device drm_gem_object that
+ * userspace mmap()s through the standard GEM_MMAP path; per-fpriv handles
+ * are minted on demand by the AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl. No
+ * file_operations override and no fixed mmap pgoff sentinel.
+ *
+ * FIXME: KFD's SDMA queue doorbells (kgd_*_hqd_sdma_get_doorbell on chips
+ * with a non-stub implementation, e.g. gfx9.4.3) are computed
+ * from the same adev->doorbell_index.sdma_engine[] array and would
+ * overlap with this pool. On gfx12 the kgd hook stubs to 0, so there is
+ * no immediate conflict. A shared per-adev allocator that both
+ * KFD and amdgpu UMQ call into is the longer-term fix.
+ */
+
+struct amdgpu_sdma_userq_db_obj {
+ struct drm_gem_object base;
+ resource_size_t phys_base; /* BAR phys addr of window start */
+ u32 size; /* window size in bytes */
+};
+
+#define to_amdgpu_sdma_userq_db(_obj) \
+ container_of(_obj, struct amdgpu_sdma_userq_db_obj, base)
+
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
bool caller_handles_kernel_queues);
@@ -205,6 +251,15 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
bool duplicate);
+struct drm_file;
+struct amdgpu_sdma_userq_db_obj;
+int amdgpu_sdma_userq_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_sdma_userq_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_sdma_userq_doorbell_alloc(struct amdgpu_device *adev, u32 *out_slot);
+void amdgpu_sdma_userq_doorbell_free(struct amdgpu_device *adev, u32 slot);
+int amdgpu_sdma_userq_doorbell_create_handle(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ u32 *handle, u32 *size_bytes);
void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
bool duplicate);
int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
--
2.49.0
next reply other threads:[~2026-04-24 8:20 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-24 8:18 Jesse Zhang [this message]
2026-04-24 8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
2026-04-24 8:18 ` [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
2026-04-24 8:18 ` [PATCH 04/10] drm/amdgpu/sdma6: " Jesse Zhang
2026-04-24 8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
2026-04-24 8:29 ` Christian König
2026-04-24 13:27 ` Alex Deucher
2026-04-24 13:32 ` Alex Deucher
2026-04-27 8:33 ` Zhang, Jesse(Jie)
2026-04-24 8:18 ` [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
2026-04-24 8:18 ` [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
2026-04-24 8:18 ` [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
2026-04-24 8:18 ` [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
2026-04-24 8:18 ` [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260424081955.873090-1-Jesse.Zhang@amd.com \
--to=jesse.zhang@amd.com \
--cc=Alexander.Deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox