* [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
` (7 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
User-allocated DOORBELL BOs land at BAR offsets outside the
firmware-managed NBIO SDMA decode window and cannot reach the SDMA
back-end. For AMDGPU_HW_IP_DMA queues, ignore the user-supplied
doorbell index and allocate one from the per-device pool added in the
previous patch. Track the assigned slot id on the queue so it can be
returned to the bitmap on destroy.
Add a new sdma_doorbell_offset_bytes field to drm_amdgpu_userq_out
that tells userspace where its kernel-allocated qword slot lives
inside the BO it will mmap (the BO handle comes from the
AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl added later in the series).
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 47 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 7 ++++
include/uapi/drm/amdgpu_drm.h | 8 ++++
3 files changed, 62 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 8f48520cb822..cea0f9cb59d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -662,6 +662,10 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
amdgpu_bo_unpin(queue->wptr_obj.obj);
amdgpu_bo_unreserve(queue->wptr_obj.obj);
amdgpu_bo_unref(&queue->wptr_obj.obj);
+
+ if (queue->sdma_userq_db_slot >= 0)
+ amdgpu_sdma_userq_doorbell_free(adev,
+ (u32)queue->sdma_userq_db_slot);
kfree(queue);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -762,6 +766,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
queue->priority = priority;
+ queue->sdma_userq_db_slot = -1;
db_info.queue_type = queue->queue_type;
db_info.doorbell_handle = queue->doorbell_handle;
@@ -792,6 +797,38 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto clean_mapping;
}
+ /*
+ * SDMA UMQ doorbell override: user-allocated DOORBELL BOs land at
+ * BAR offsets outside the firmware-managed NBIO SDMA decode window
+ * and cannot reach the SDMA back-end. Replace the user-supplied
+ * doorbell index with one allocated from the per-device
+ * sdma.userq_db_obj BO that sits inside the routable window.
+ * Userspace fetches a GEM handle for that BO via
+ * AMDGPU_INFO_SDMA_USERQ_DOORBELL and mmap()s it through the
+ * standard GEM_MMAP path; sdma_doorbell_offset_bytes (returned in
+ * args->out) tells userspace where inside that mapping its slot
+ * lives.
+ */
+ if (queue->queue_type == AMDGPU_HW_IP_DMA &&
+ adev->sdma.userq_db_obj) {
+ u32 slot_id;
+
+ r = amdgpu_sdma_userq_doorbell_alloc(adev, &slot_id);
+ if (r) {
+ drm_file_err(uq_mgr->file,
+ "SDMA UMQ doorbell pool exhausted (err=%d)\n",
+ r);
+ goto clean_mapping;
+ }
+ /*
+ * Slot id is a qword index inside the routable window;
+ * convert to absolute BAR dword index.
+ */
+ index = (u64)(adev->doorbell_index.sdma_engine[0] << 1) +
+ (u64)slot_id * 2;
+ queue->sdma_userq_db_slot = (int)slot_id;
+ }
+
queue->doorbell_index = index;
xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
@@ -851,6 +888,16 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
amdgpu_userq_init_hang_detect_work(queue);
args->out.queue_id = qid;
+ if (queue->sdma_userq_db_slot >= 0) {
+ /*
+ * Tell userspace where inside its mmap of the SDMA UMQ
+ * doorbell BO (handle returned by
+ * AMDGPU_INFO_SDMA_USERQ_DOORBELL) the assigned qword slot
+ * lives.
+ */
+ args->out.sdma_doorbell_offset_bytes =
+ (u64)queue->sdma_userq_db_slot * sizeof(u64);
+ }
atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
mutex_unlock(&uq_mgr->userq_mutex);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 675fe6395ac8..cdfced627dec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -59,6 +59,13 @@ struct amdgpu_usermode_queue {
uint64_t doorbell_handle;
uint64_t doorbell_index;
uint64_t flags;
+ /*
+ * For SDMA UMQs whose doorbell came from the kernel-managed pool
+ * (amdgpu_sdma_userq_doorbell_alloc), record the slot id so it can
+ * be returned to the bitmap on queue destroy. -1 means the queue
+ * is using a user-supplied doorbell BO.
+ */
+ int sdma_userq_db_slot;
struct amdgpu_mqd_prop *userq_prop;
struct amdgpu_userq_mgr *userq_mgr;
struct amdgpu_vm *vm;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 9f3090db2f16..79e8bbda046b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -421,6 +421,14 @@ struct drm_amdgpu_userq_out {
*/
__u32 queue_id;
__u32 _pad;
+ /**
+ * For SDMA usermode queues whose doorbell was assigned by the
+ * kernel from the per-device pool (see AMDGPU_INFO_SDMA_USERQ_DOORBELL),
+ * this field carries the byte offset of the assigned slot inside
+ * the routable doorbell window so userspace can write there.
+ * 0 means the kernel did not override the user's doorbell.
+ */
+ __u64 sdma_doorbell_offset_bytes;
};
union drm_amdgpu_userq {
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
2026-04-24 8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 04/10] drm/amdgpu/sdma6: " Jesse Zhang
` (6 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
Call amdgpu_sdma_userq_doorbell_init() at sw_init time (gated on
userq_funcs[AMDGPU_HW_IP_DMA] being set so we only run on chips that
actually expose SDMA UMQs) and amdgpu_sdma_userq_doorbell_fini() at
sw_fini, so SDMA usermode queues get doorbells from the
firmware-managed NBIO routable window.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 85d98a0e1bff..5f6c51ba7ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1378,6 +1378,14 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
break;
}
+ /*
+ * Init the SDMA usermode-queue doorbell pool inside the firmware-
+ * managed NBIO S2A SDMA decode window so user SDMA UMQs get
+ * doorbells that are actually routable to the SDMA back-end.
+ */
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA])
+ amdgpu_sdma_userq_doorbell_init(adev);
+
return r;
}
@@ -1389,6 +1397,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_userq_doorbell_fini(adev);
amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 04/10] drm/amdgpu/sdma6: register SDMA UMQ doorbell pool
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
2026-04-24 8:18 ` [PATCH 02/10] drm/amdgpu/userq: route SDMA UMQ doorbells through the kernel pool Jesse Zhang
2026-04-24 8:18 ` [PATCH 03/10] drm/amdgpu/sdma7: register SDMA UMQ doorbell pool Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
` (5 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
Call amdgpu_sdma_userq_doorbell_init() at sw_init time (gated on
userq_funcs[AMDGPU_HW_IP_DMA] being set so we only run on chips that
actually expose SDMA UMQs) and amdgpu_sdma_userq_doorbell_fini() at
sw_fini, so SDMA usermode queues get doorbells from the
firmware-managed NBIO routable window.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index de329b76a00c..02eeac3b2e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1426,6 +1426,14 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ /*
+ * Init the SDMA usermode-queue doorbell pool inside the firmware-
+ * managed NBIO SDMA decode window so user SDMA UMQs get doorbells
+ * that are actually routable to the SDMA back-end.
+ */
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA])
+ amdgpu_sdma_userq_doorbell_init(adev);
+
return r;
}
@@ -1437,6 +1445,7 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_userq_doorbell_fini(adev);
amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
` (2 preceding siblings ...)
2026-04-24 8:18 ` [PATCH 04/10] drm/amdgpu/sdma6: " Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:29 ` Christian König
2026-04-24 8:18 ` [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
` (4 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
Userspace mmap()s that handle through the standard
AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
routable doorbell BAR window; each created SDMA usermode queue's
qword-slot offset inside that mapping is reported in
drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d88e4994c8c1..dbcfbe418e42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return -EINVAL;
}
}
+ case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
+ struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
+ int r;
+
+ r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
+ &db_info.handle,
+ &db_info.size_bytes);
+ if (r)
+ return r;
+ return copy_to_user(out, &db_info,
+ min((size_t)size, sizeof(db_info)))
+ ? -EFAULT : 0;
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 79e8bbda046b..533be8ad8a7e 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
#define AMDGPU_INFO_GPUVM_FAULT 0x23
/* query FW object size and alignment */
#define AMDGPU_INFO_UQ_FW_AREAS 0x24
+/*
+ * SDMA usermode-queue doorbell window query. Returns a per-fpriv GEM
+ * handle for a kernel-owned BO that backs the routable SDMA doorbell
+ * window, plus its byte size. Userspace mmap()s the BO via the standard
+ * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
+ * SDMA usermode queue's slot offset inside that mapping is returned in
+ * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
+ */
+#define AMDGPU_INFO_SDMA_USERQ_DOORBELL 0x25
+
+struct drm_amdgpu_info_sdma_userq_doorbell {
+ /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
+ __u32 handle;
+ /* Byte size of the BO (== mmappable window size). */
+ __u32 size_bytes;
+};
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
2026-04-24 8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
@ 2026-04-24 8:29 ` Christian König
2026-04-24 13:27 ` Alex Deucher
0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2026-04-24 8:29 UTC (permalink / raw)
To: Jesse Zhang, amd-gfx; +Cc: Alexander.Deucher
On 4/24/26 10:18, Jesse Zhang wrote:
> From: "Jesse.zhang" <Jesse.zhang@amd.com>
>
> New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> Userspace mmap()s that handle through the standard
> AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> routable doorbell BAR window; each created SDMA usermode queue's
> qword-slot offset inside that mapping is reported in
> drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
We added the separate IOCTL for this purpose on the KFD/KGD unification branch. I think we should just cherry pick that over to amd-staging-drm-next.
@Alex what do you think?
Regards,
Christian.
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
> 2 files changed, 29 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d88e4994c8c1..dbcfbe418e42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> return -EINVAL;
> }
> }
> + case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> + struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> + int r;
> +
> + r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> + &db_info.handle,
> + &db_info.size_bytes);
> + if (r)
> + return r;
> + return copy_to_user(out, &db_info,
> + min((size_t)size, sizeof(db_info)))
> + ? -EFAULT : 0;
> + }
> default:
> DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> return -EINVAL;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 79e8bbda046b..533be8ad8a7e 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> #define AMDGPU_INFO_GPUVM_FAULT 0x23
> /* query FW object size and alignment */
> #define AMDGPU_INFO_UQ_FW_AREAS 0x24
> +/*
> + * SDMA usermode-queue doorbell window query. Returns a per-fpriv GEM
> + * handle for a kernel-owned BO that backs the routable SDMA doorbell
> + * window, plus its byte size. Userspace mmap()s the BO via the standard
> + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
> + * SDMA usermode queue's slot offset inside that mapping is returned in
> + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> + */
> +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL 0x25
> +
> +struct drm_amdgpu_info_sdma_userq_doorbell {
> + /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> + __u32 handle;
> + /* Byte size of the BO (== mmappable window size). */
> + __u32 size_bytes;
> +};
>
> #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
> #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
2026-04-24 8:29 ` Christian König
@ 2026-04-24 13:27 ` Alex Deucher
2026-04-24 13:32 ` Alex Deucher
0 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2026-04-24 13:27 UTC (permalink / raw)
To: Christian König; +Cc: Jesse Zhang, amd-gfx, Alexander.Deucher
On Fri, Apr 24, 2026 at 4:39 AM Christian König
<christian.koenig@amd.com> wrote:
>
> On 4/24/26 10:18, Jesse Zhang wrote:
> > From: "Jesse.zhang" <Jesse.zhang@amd.com>
> >
> > New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> > kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> > Userspace mmap()s that handle through the standard
> > AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> > routable doorbell BAR window; each created SDMA usermode queue's
> > qword-slot offset inside that mapping is reported in
> > drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
>
> We added the separate IOCTL for this purpose on the KFD/KGD unification branch. I think we should just cherry pick that over to amd-staging-drm-next.
>
> @Alex what do you think?
Yes, we already have a patch for this and another fix for compute
queues in the unification branch. We were just waiting on mesa to
make use of it.
Alex
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> > include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
> > 2 files changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index d88e4994c8c1..dbcfbe418e42 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> > return -EINVAL;
> > }
> > }
> > + case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> > + struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> > + int r;
> > +
> > + r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> > + &db_info.handle,
> > + &db_info.size_bytes);
> > + if (r)
> > + return r;
> > + return copy_to_user(out, &db_info,
> > + min((size_t)size, sizeof(db_info)))
> > + ? -EFAULT : 0;
> > + }
> > default:
> > DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> > return -EINVAL;
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 79e8bbda046b..533be8ad8a7e 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> > #define AMDGPU_INFO_GPUVM_FAULT 0x23
> > /* query FW object size and alignment */
> > #define AMDGPU_INFO_UQ_FW_AREAS 0x24
> > +/*
> > + * SDMA usermode-queue doorbell window query. Returns a per-fpriv GEM
> > + * handle for a kernel-owned BO that backs the routable SDMA doorbell
> > + * window, plus its byte size. Userspace mmap()s the BO via the standard
> > + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
> > + * SDMA usermode queue's slot offset inside that mapping is returned in
> > + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > + */
> > +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL 0x25
> > +
> > +struct drm_amdgpu_info_sdma_userq_doorbell {
> > + /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> > + __u32 handle;
> > + /* Byte size of the BO (== mmappable window size). */
> > + __u32 size_bytes;
> > +};
> >
> > #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
> > #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
2026-04-24 13:27 ` Alex Deucher
@ 2026-04-24 13:32 ` Alex Deucher
2026-04-27 8:33 ` Zhang, Jesse(Jie)
0 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2026-04-24 13:32 UTC (permalink / raw)
To: Christian König; +Cc: Jesse Zhang, amd-gfx, Alexander.Deucher
On Fri, Apr 24, 2026 at 9:27 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Fri, Apr 24, 2026 at 4:39 AM Christian König
> <christian.koenig@amd.com> wrote:
> >
> > On 4/24/26 10:18, Jesse Zhang wrote:
> > > From: "Jesse.zhang" <Jesse.zhang@amd.com>
> > >
> > > New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> > > kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> > > Userspace mmap()s that handle through the standard
> > > AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> > > routable doorbell BAR window; each created SDMA usermode queue's
> > > qword-slot offset inside that mapping is reported in
> > > drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> >
> > We added the separate IOCTL for this purpose on the KFD/KGD unification branch. I think we should just cherry pick that over to amd-staging-drm-next.
> >
> > @Alex what do you think?
>
> Yes, we already have a patch for this and another fix for compute
> queues in the unification branch. We were just waiting on mesa to
> make use of it.
It makes sense to land those patches, but the doorbell offset stuff is
part of David's VCN user queue patch set. See:
https://lists.freedesktop.org/archives/amd-gfx/2026-February/138619.html
Alex
>
> Alex
>
> >
> > Regards,
> > Christian.
> >
> > >
> > > Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> > > ---
> > > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> > > include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
> > > 2 files changed, 29 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index d88e4994c8c1..dbcfbe418e42 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> > > return -EINVAL;
> > > }
> > > }
> > > + case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> > > + struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> > > + int r;
> > > +
> > > + r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> > > + &db_info.handle,
> > > + &db_info.size_bytes);
> > > + if (r)
> > > + return r;
> > > + return copy_to_user(out, &db_info,
> > > + min((size_t)size, sizeof(db_info)))
> > > + ? -EFAULT : 0;
> > > + }
> > > default:
> > > DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> > > return -EINVAL;
> > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > > index 79e8bbda046b..533be8ad8a7e 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> > > #define AMDGPU_INFO_GPUVM_FAULT 0x23
> > > /* query FW object size and alignment */
> > > #define AMDGPU_INFO_UQ_FW_AREAS 0x24
> > > +/*
> > > + * SDMA usermode-queue doorbell window query. Returns a per-fpriv GEM
> > > + * handle for a kernel-owned BO that backs the routable SDMA doorbell
> > > + * window, plus its byte size. Userspace mmap()s the BO via the standard
> > > + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each created
> > > + * SDMA usermode queue's slot offset inside that mapping is returned in
> > > + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > > + */
> > > +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL 0x25
> > > +
> > > +struct drm_amdgpu_info_sdma_userq_doorbell {
> > > + /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> > > + __u32 handle;
> > > + /* Byte size of the BO (== mmappable window size). */
> > > + __u32 size_bytes;
> > > +};
> > >
> > > #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
> > > #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
> >
^ permalink raw reply [flat|nested] 14+ messages in thread* RE: [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
2026-04-24 13:32 ` Alex Deucher
@ 2026-04-27 8:33 ` Zhang, Jesse(Jie)
0 siblings, 0 replies; 14+ messages in thread
From: Zhang, Jesse(Jie) @ 2026-04-27 8:33 UTC (permalink / raw)
To: Alex Deucher, Koenig, Christian
Cc: amd-gfx@lists.freedesktop.org, Deucher, Alexander
AMD General
> -----Original Message-----
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Friday, April 24, 2026 9:33 PM
> To: Koenig, Christian <Christian.Koenig@amd.com>
> Cc: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; amd-gfx@lists.freedesktop.org;
> Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH 05/10] drm/amdgpu: add
> AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl
>
> On Fri, Apr 24, 2026 at 9:27 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Fri, Apr 24, 2026 at 4:39 AM Christian König
> > <christian.koenig@amd.com> wrote:
> > >
> > > On 4/24/26 10:18, Jesse Zhang wrote:
> > > > From: "Jesse.zhang" <Jesse.zhang@amd.com>
> > > >
> > > > New AMDGPU_INFO query that returns a per-fpriv GEM handle for the
> > > > kernel-owned BO backing the SDMA UMQ doorbell window, plus its size.
> > > > Userspace mmap()s that handle through the standard
> > > > AMDGPU_GEM_OP_MMAP / mmap() flow to obtain a CPU pointer to the
> > > > routable doorbell BAR window; each created SDMA usermode queue's
> > > > qword-slot offset inside that mapping is reported in
> > > > drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > >
> > > We added the separate IOCTL for this purpose on the KFD/KGD unification
> branch. I think we should just cherry pick that over to amd-staging-drm-next.
> > >
> > > @Alex what do you think?
> >
> > Yes, we already have a patch for this and another fix for compute
> > queues in the unification branch. We were just waiting on mesa to
> > make use of it.
>
> It makes sense to land those patches, but the doorbell offset stuff is part of David's
> VCN user queue patch set. See:
> https://lists.freedesktop.org/archives/amd-gfx/2026-February/138619.html
Thanks for the reminder, Alex. I will update the patch.
Thanks
Jesse.
>
> Alex
>
> >
> > Alex
> >
> > >
> > > Regards,
> > > Christian.
> > >
> > > >
> > > > Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> > > > ---
> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 +++++++++++++
> > > > include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
> > > > 2 files changed, 29 insertions(+)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > index d88e4994c8c1..dbcfbe418e42 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > @@ -1425,6 +1425,19 @@ int amdgpu_info_ioctl(struct drm_device *dev,
> void *data, struct drm_file *filp)
> > > > return -EINVAL;
> > > > }
> > > > }
> > > > + case AMDGPU_INFO_SDMA_USERQ_DOORBELL: {
> > > > + struct drm_amdgpu_info_sdma_userq_doorbell db_info = {};
> > > > + int r;
> > > > +
> > > > + r = amdgpu_sdma_userq_doorbell_create_handle(adev, filp,
> > > > + &db_info.handle,
> > > > + &db_info.size_bytes);
> > > > + if (r)
> > > > + return r;
> > > > + return copy_to_user(out, &db_info,
> > > > + min((size_t)size, sizeof(db_info)))
> > > > + ? -EFAULT : 0;
> > > > + }
> > > > default:
> > > > DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> > > > return -EINVAL;
> > > > diff --git a/include/uapi/drm/amdgpu_drm.h
> > > > b/include/uapi/drm/amdgpu_drm.h index 79e8bbda046b..533be8ad8a7e
> > > > 100644
> > > > --- a/include/uapi/drm/amdgpu_drm.h
> > > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > > @@ -1280,6 +1280,22 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> > > > #define AMDGPU_INFO_GPUVM_FAULT 0x23
> > > > /* query FW object size and alignment */
> > > > #define AMDGPU_INFO_UQ_FW_AREAS 0x24
> > > > +/*
> > > > + * SDMA usermode-queue doorbell window query. Returns a
> > > > +per-fpriv GEM
> > > > + * handle for a kernel-owned BO that backs the routable SDMA
> > > > +doorbell
> > > > + * window, plus its byte size. Userspace mmap()s the BO via the
> > > > +standard
> > > > + * AMDGPU_GEM_OP_MMAP / mmap() flow to get a CPU pointer; each
> > > > +created
> > > > + * SDMA usermode queue's slot offset inside that mapping is
> > > > +returned in
> > > > + * drm_amdgpu_userq_out.sdma_doorbell_offset_bytes.
> > > > + */
> > > > +#define AMDGPU_INFO_SDMA_USERQ_DOORBELL 0x25
> > > > +
> > > > +struct drm_amdgpu_info_sdma_userq_doorbell {
> > > > + /* Per-fpriv GEM handle for the SDMA UMQ doorbell BO. */
> > > > + __u32 handle;
> > > > + /* Byte size of the BO (== mmappable window size). */
> > > > + __u32 size_bytes;
> > > > +};
> > > >
> > > > #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
> > > > #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
> > >
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
` (3 preceding siblings ...)
2026-04-24 8:18 ` [PATCH 05/10] drm/amdgpu: add AMDGPU_INFO_SDMA_USERQ_DOORBELL ioctl Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
` (3 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
Kernel-side abstraction work for the SDMA usermode-queue plumbing
landed in subsequent per-engine patches:
- mes_add_queue_input gains is_user_mode_submission and
unmap_flag_addr. Without is_user_mode_submission MES treats SDMA
queues as kernel-managed and uses the end-of-MQD slot for the unmap
flag, so PROTECTED_FENCE at the tail of every SDMA IB looks like a
"queue done" signal and MES gangs the queue out forever.
- mes_misc_opcode gains MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE
with a notify_work.priority_level payload. This wakes a gangs-out
SDMA UMQ so subsequent IBs get re-mapped (SDMA has no
CP_UNMAPPED_DOORBELL HW intercept).
Also surface the matching firmware bits in mes_v12_api_def.h:
is_user_mode_submission / enable_perf_profiling /
exclude_process_limit / is_video_blit_queue bitfields in
MESAPI__ADD_QUEUE, and the unmap_flag_addr packet field.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 7 +++++++
drivers/gpu/drm/amd/include/mes_v12_api_def.h | 12 +++++++++++-
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index cafc5caae822..705056de94b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -265,6 +265,8 @@ struct mes_add_queue_input {
uint32_t exclusively_scheduled;
uint32_t sh_mem_config_data;
uint32_t vm_cntx_cntl;
+ uint32_t is_user_mode_submission;
+ uint64_t unmap_flag_addr;
};
struct mes_remove_queue_input {
@@ -343,6 +345,7 @@ enum mes_misc_opcode {
MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
MES_MISC_OP_CHANGE_CONFIG,
+ MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE,
};
struct mes_misc_op_input {
@@ -397,6 +400,10 @@ struct mes_misc_op_input {
uint32_t tdr_delay;
} tdr_config;
} change_config;
+
+ struct {
+ uint32_t priority_level;
+ } notify_work;
};
};
diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
index e541a43714a1..cd6e60184a06 100644
--- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
@@ -381,7 +381,11 @@ union MESAPI__ADD_QUEUE {
uint32_t exclusively_scheduled : 1;
uint32_t is_long_running : 1;
uint32_t is_dwm_queue : 1;
- uint32_t reserved : 15;
+ uint32_t is_video_blit_queue : 1;
+ uint32_t is_user_mode_submission : 1;
+ uint32_t enable_perf_profiling : 1;
+ uint32_t exclude_process_limit : 1;
+ uint32_t reserved : 11;
};
struct MES_API_STATUS api_status;
uint64_t tma_addr;
@@ -393,6 +397,12 @@ union MESAPI__ADD_QUEUE {
uint32_t queue_id;
uint32_t alignment_mode_setting;
uint32_t full_sh_mem_config_data;
+ /*
+ * MC addr where MES writes 1 when it unmaps the queue. Used
+ * by user-mode SDMA UMQs so the kernel/userspace can detect
+ * the unmapped state and re-arm work via NOTIFY_WORK_ON_UNMAPPED_QUEUE.
+ */
+ uint64_t unmap_flag_addr;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
` (4 preceding siblings ...)
2026-04-24 8:18 ` [PATCH 06/10] drm/amdgpu/mes: add NOTIFY_WORK_ON_UNMAPPED_QUEUE op + ADD_QUEUE fields Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
` (2 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
Pass the new mes_add_queue_input.unmap_flag_addr through to the
MESAPI__ADD_QUEUE packet, and route MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE
to the matching MESAPI_MISC opcode.
Note: the MES v11 firmware spec does not (yet) carry a per-queue
is_user_mode_submission bit, so SDMA UMQs on chips with MES v11 may
still see PROTECTED_FENCE-as-queue-done behaviour after the first IB
until firmware adds the bit. The wakeup mechanism (NOTIFY) is wired
up so that path is ready when firmware lands.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index a926a330700e..575cc4a684b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -362,6 +362,16 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
+ /*
+ * unmap_flag_addr is plumbed through but only honoured by MES when
+ * the global use_add_queue_unmap_flag_addr flag is set in
+ * SET_HW_RESOURCES. MES v11 firmware spec does not carry a
+ * per-queue is_user_mode_submission bit, so SDMA UMQs on chips with
+ * MES v11 may still see PROTECTED_FENCE-as-queue-done behaviour
+ * until firmware adds the bit.
+ */
+ mes_add_queue_pkt.unmap_flag_addr = input->unmap_flag_addr;
+
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
@@ -660,6 +670,10 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
misc_pkt.change_config.option.bits.limit_single_process =
input->change_config.option.limit_single_process;
break;
+ case MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE:
+ misc_pkt.opcode = MESAPI_MISC__NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+ misc_pkt.queue_sch_level = input->notify_work.priority_level;
+ break;
default:
drm_err(adev_to_drm(mes->adev), "unsupported misc op (%d)\n", input->op);
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
` (5 preceding siblings ...)
2026-04-24 8:18 ` [PATCH 07/10] drm/amdgpu/mes11: plumb unmap_flag_addr + NOTIFY_WORK_ON_UNMAPPED_QUEUE Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
2026-04-24 8:18 ` [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
Pass is_user_mode_submission and unmap_flag_addr from
mes_add_queue_input through to MESAPI__ADD_QUEUE in both mes_v12_0
and mes_v12_1 add_hw_queue paths, and route
MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE to the matching MESAPI_MISC
opcode.
The kernel-side caller that actually sets is_user_mode_submission for
SDMA UMQs lives in a later patch; this one is just the engine-level
plumbing.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 6 ++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_1.c | 6 ++++++
2 files changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 023c7345ea54..5acc505533f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -342,6 +342,8 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_en = input->trap_en;
mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+ mes_add_queue_pkt.is_user_mode_submission = input->is_user_mode_submission;
+ mes_add_queue_pkt.unmap_flag_addr = input->unmap_flag_addr;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
@@ -697,6 +699,10 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
misc_pkt.change_config.option.bits.limit_single_process =
input->change_config.option.limit_single_process;
break;
+ case MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE:
+ misc_pkt.opcode = MESAPI_MISC__NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+ misc_pkt.queue_sch_level = input->notify_work.priority_level;
+ break;
default:
DRM_ERROR("unsupported misc op (%d)\n", input->op);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 2d8a10d18939..235dbbf99ec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -325,6 +325,8 @@ static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_en = input->trap_en;
mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+ mes_add_queue_pkt.is_user_mode_submission = input->is_user_mode_submission;
+ mes_add_queue_pkt.unmap_flag_addr = input->unmap_flag_addr;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
@@ -598,6 +600,10 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
misc_pkt.change_config.option.bits.limit_single_process =
input->change_config.option.limit_single_process;
break;
+ case MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE:
+ misc_pkt.opcode = MESAPI_MISC__NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+ misc_pkt.queue_sch_level = input->notify_work.priority_level;
+ break;
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
` (6 preceding siblings ...)
2026-04-24 8:18 ` [PATCH 08/10] drm/amdgpu/mes12: plumb is_user_mode_submission, unmap_flag_addr, NOTIFY Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
2026-04-24 8:18 ` [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY Jesse Zhang
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="Y", Size: 2382 bytes --]
From: "Jesse.zhang" <Jesse.zhang@amd.com>
For AMDGPU_HW_IP_DMA queues, set mes_add_queue_input.is_user_mode_submission
and a stable unmap_flag_addr (a kernel-owned dword in the MQD
object's tail padding). This tells MES to use the new wptr_mc /
unmap_flag scheme so the PROTECTED_FENCE at the tail of every SDMA
IB no longer terminates the queue. Combined with the
NOTIFY_WORK_ON_UNMAPPED_QUEUE wakeup added in a follow-up patch, this
lets multi-IB submissions on a single SDMA UMQ work end-to-end.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index d12cd1b7790b..3dbcddb46b24 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -165,6 +165,28 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue)
queue_input.doorbell_offset = userq_props->doorbell_index;
queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+ /*
+ * SDMA UMQs need is_user_mode_submission so MES treats them as user
+ * queues (using the new wptr_mc_addr / unmap_flag_addr scheme).
+ * Without this MES uses end-of-MQD for unmap_flag, sees PROTECTED_FENCE
+ * as a "queue done" signal, and gangs the queue out forever. Combined
+ * with NOTIFY_WORK_ON_UNMAPPED_QUEUE poke from amdgpu_userq_signal_ioctl
+ * this lets multi-IB submissions work. Use queue->mqd.gpu_addr +
+ * mqd_size as a stable kernel-owned location for unmap_flag — userspace
+ * never reads it; the kernel just needs SOMETHING valid to give MES.
+ */
+ if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+ queue_input.is_user_mode_submission = 1;
+ /*
+ * Same offset MES would derive in legacy mode
+ * (get_unmap_flag_addr_from_end_of_mqd in MES src 12). Lives
+ * inside the allocated MQD object's tail padding so it's a
+ * valid MC address; the kernel never reads it back — its only
+ * purpose is to keep MES happy.
+ */
+ queue_input.unmap_flag_addr = queue->mqd.gpu_addr +
+ adev->mqds[queue->queue_type].mqd_size + sizeof(u32);
+ }
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 10/10] drm/amdgpu/userq_fence: wake gangs-out SDMA UMQs via NOTIFY
2026-04-24 8:18 [PATCH 01/10] drm/amdgpu/sdma: add SDMA usermode-queue doorbell pool infra Jesse Zhang
` (7 preceding siblings ...)
2026-04-24 8:18 ` [PATCH 09/10] drm/amdgpu/mes_userqueue: mark SDMA UMQs as user-mode submission Jesse Zhang
@ 2026-04-24 8:18 ` Jesse Zhang
8 siblings, 0 replies; 14+ messages in thread
From: Jesse Zhang @ 2026-04-24 8:18 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian Koenig, Jesse.zhang, Jesse Zhang
From: "Jesse.zhang" <Jesse.zhang@amd.com>
SDMA has no CP_UNMAPPED_DOORBELL HW intercept, so once MES gangs the
queue out (after the first IB idles it) per-queue doorbell rings from
userspace hit a mapped-out HW slot and are silently dropped: rptr
stops advancing and FENCE IRQ never fires.
After the SDMA UMQ's first IB has actually completed
(fence_drv->cpu_addr != 0), issue
MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE and ring the priority's
aggregated doorbell so MES re-evaluates scheduling and re-maps the
queue for the next IB. The first submission is intentionally skipped
— the queue is still mapped from MAP_QUEUE then, and an extra notify
would race the initial scheduling.
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
---
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 33 +++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index a58342c2ac44..6ef4cbd5d5da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -598,6 +598,39 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
/* drop the reference acquired in fence creation function */
dma_fence_put(fence);
+ /*
+ * SDMA UMQ wake: SDMA has no CP_UNMAPPED_DOORBELL HW intercept, so
+ * once MES gangs the queue out (after the first IB's PROTECTED_FENCE
+ * idles the queue), subsequent per-queue doorbell rings hit a
+ * mapped-out HW slot and are silently ignored — rptr stops
+ * advancing, FENCE IRQ never fires. The MES MISC API
+ * NOTIFY_WORK_ON_UNMAPPED_QUEUE flips MES's hasReadyQueues flag for
+ * the queue's priority level, which makes MES re-evaluate
+ * scheduling and re-map our SDMA UMQ for the next IB.
+ *
+ * Skip on the very first submission (fence_drv->cpu_addr == 0
+ * means SDMA hasn't completed any IB yet, so MES still has the
+ * queue mapped from MAP_QUEUE — calling NOTIFY here would race the
+ * initial scheduling and starve the first IB).
+ */
+ if (queue && queue->queue_type == AMDGPU_HW_IP_DMA &&
+ adev->enable_mes && adev->mes.funcs->misc_op &&
+ queue->fence_drv && queue->fence_drv->cpu_addr &&
+ le64_to_cpu(*queue->fence_drv->cpu_addr) != 0) {
+ struct mes_misc_op_input op = { 0 };
+ u32 agg_db = adev->mes.aggregated_doorbells[
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL];
+
+ op.op = MES_MISC_OP_NOTIFY_WORK_ON_UNMAPPED_QUEUE;
+ op.notify_work.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ amdgpu_mes_lock(&adev->mes);
+ (void)adev->mes.funcs->misc_op(&adev->mes, &op);
+ amdgpu_mes_unlock(&adev->mes);
+
+ if (agg_db)
+ WDOORBELL64(agg_db, queue->doorbell_index);
+ }
+
exec_fini:
drm_exec_fini(&exec);
put_gobj_write:
--
2.49.0
^ permalink raw reply related [flat|nested] 14+ messages in thread