* [PATCH v8 01/14] drm/amdgpu: validate userq input args
@ 2025-08-08 6:28 Prike Liang
2025-08-08 6:28 ` [PATCH v8 02/14] drm/amdgpu: validate userq hw unmap status for destroying userq Prike Liang
` (12 more replies)
0 siblings, 13 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:28 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang, Alex Deucher
This will help on validating the userq input args, and
rejecting for the invalid userq request at the IOCTLs
first place.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 81 +++++++++++++++-------
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 7 --
2 files changed, 56 insertions(+), 32 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 295e7186e156..7f9dfeae4322 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -359,27 +359,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
- /* Usermode queues are only supported for GFX IP as of now */
- if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
- args->in.ip_type != AMDGPU_HW_IP_DMA &&
- args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
- drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n",
- args->in.ip_type);
- return -EINVAL;
- }
-
r = amdgpu_userq_priority_permit(filp, priority);
if (r)
return r;
- if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
- (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
- (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
- !amdgpu_is_tmz(adev)) {
- drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n");
- return -EINVAL;
- }
-
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
@@ -485,22 +468,45 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
return r;
}
-int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp)
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+ union drm_amdgpu_userq *args,
+ struct drm_file *filp)
{
- union drm_amdgpu_userq *args = data;
- int r;
+ struct amdgpu_device *adev = drm_to_adev(dev);
switch (args->in.op) {
case AMDGPU_USERQ_OP_CREATE:
if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
return -EINVAL;
- r = amdgpu_userq_create(filp, args);
- if (r)
- drm_file_err(filp, "Failed to create usermode queue\n");
- break;
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+ if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+ args->in.queue_va == 0 ||
+ args->in.queue_size == 0) {
+ drm_file_err(filp, "invalidate userq queue va or size\n");
+ return -EINVAL;
+ }
+ if (!args->in.wptr_va || !args->in.rptr_va) {
+ drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+ return -EINVAL;
+ }
+ break;
case AMDGPU_USERQ_OP_FREE:
if (args->in.ip_type ||
args->in.doorbell_handle ||
@@ -514,6 +520,31 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
args->in.mqd ||
args->in.mqd_size)
return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
r = amdgpu_userq_destroy(filp, args->in.queue_id);
if (r)
drm_file_err(filp, "Failed to destroy usermode queue\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index d6f50b13e2ba..1457fb49a794 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -215,13 +215,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
return -ENOMEM;
}
- if (!mqd_user->wptr_va || !mqd_user->rptr_va ||
- !mqd_user->queue_va || mqd_user->queue_size == 0) {
- DRM_ERROR("Invalid MQD parameters for userqueue\n");
- r = -EINVAL;
- goto free_props;
- }
-
r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
if (r) {
DRM_ERROR("Failed to create MQD object for userqueue\n");
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 02/14] drm/amdgpu: validate userq hw unmap status for destroying userq
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
@ 2025-08-08 6:28 ` Prike Liang
2025-08-08 6:28 ` [PATCH v8 03/14] drm/amdgpu: add UAPI for user queue query status Prike Liang
` (11 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:28 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang, Alex Deucher
Before destroying the userq buffer object, it requires validating
the userq HW unmap status and ensuring the userq is unmapped from
hardware. If the user HW unmap failed, then it needs to reset the
queue for reusing.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 7f9dfeae4322..2b35198608a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -319,6 +319,11 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
}
amdgpu_bo_unref(&queue->db_obj.obj);
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ /*TODO: It requires a reset for userq hw unmap error*/
+ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+ drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ }
amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
mutex_unlock(&uq_mgr->userq_mutex);
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 03/14] drm/amdgpu: add UAPI for user queue query status
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
2025-08-08 6:28 ` [PATCH v8 02/14] drm/amdgpu: validate userq hw unmap status for destroying userq Prike Liang
@ 2025-08-08 6:28 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 04/14] drm/amdgpu/userq: implement support for " Prike Liang
` (10 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:28 UTC (permalink / raw)
To: amd-gfx
Cc: Alexander.Deucher, Christian.Koenig, Alex Deucher,
Christian König, Sunil Khatri
From: Alex Deucher <alexander.deucher@amd.com>
Add an API to query queue status such as whether the
queue is hung or whether vram is lost.
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
include/uapi/drm/amdgpu_drm.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 66c4a03ac9f9..efd0d2356077 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -328,6 +328,7 @@ union drm_amdgpu_ctx {
/* user queue IOCTL operations */
#define AMDGPU_USERQ_OP_CREATE 1
#define AMDGPU_USERQ_OP_FREE 2
+#define AMDGPU_USERQ_OP_QUERY_STATUS 3
/* queue priority levels */
/* low < normal low < normal high < high */
@@ -340,6 +341,12 @@ union drm_amdgpu_ctx {
/* for queues that need access to protected content */
#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2)
+
+/* the queue is hung */
+#define AMDGPU_USERQ_QUERY_STATUS_FLAGS_HUNG (1 << 0)
+/* indicate vram lost since queue was created */
+#define AMDGPU_USERQ_QUERY_STATUS_FLAGS_VRAMLOST (1 << 1)
+
/*
* This structure is a container to pass input configuration
* info for all supported userqueue related operations.
@@ -421,9 +428,16 @@ struct drm_amdgpu_userq_out {
__u32 _pad;
};
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out_query_state {
+ __u32 flags;
+ __u32 _pad;
+};
+
union drm_amdgpu_userq {
struct drm_amdgpu_userq_in in;
struct drm_amdgpu_userq_out out;
+ struct drm_amdgpu_userq_out_query_state out_qs;
};
/* GFX V11 IP specific MQD parameters */
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 04/14] drm/amdgpu/userq: implement support for query status
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
2025-08-08 6:28 ` [PATCH v8 02/14] drm/amdgpu: validate userq hw unmap status for destroying userq Prike Liang
2025-08-08 6:28 ` [PATCH v8 03/14] drm/amdgpu: add UAPI for user queue query status Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 05/14] drm/amdgpu/userq: extend queue flags for user queue " Prike Liang
` (9 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Alex Deucher, Sunil Khatri
From: Alex Deucher <alexander.deucher@amd.com>
Query the status of the user queue, currently whether
the queue is hung and whether or not VRAM is lost.
v2: Misc cleanups
Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 35 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 +
2 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 2b35198608a7..75b7e39ee576 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -403,6 +403,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
queue->priority = priority;
+ queue->generation = amdgpu_vm_generation(adev, &fpriv->vm);
db_info.queue_type = queue->queue_type;
db_info.doorbell_handle = queue->doorbell_handle;
@@ -473,6 +474,34 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
return r;
}
+static int
+amdgpu_userq_query_status(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ int queue_id = args->in.queue_id;
+
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ queue = amdgpu_userq_find(uq_mgr, queue_id);
+ if (!queue) {
+ dev_dbg(adev->dev, "Invalid queue id to query\n");
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return -EINVAL;
+ }
+ args->out_qs.flags = 0;
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG)
+ args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_HUNG;
+ if (queue->generation != amdgpu_vm_generation(adev, &fpriv->vm))
+ args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_VRAMLOST;
+
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ return 0;
+}
+
static int amdgpu_userq_input_args_validate(struct drm_device *dev,
union drm_amdgpu_userq *args,
struct drm_file *filp)
@@ -554,7 +583,11 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
if (r)
drm_file_err(filp, "Failed to destroy usermode queue\n");
break;
-
+ case AMDGPU_USERQ_OP_QUERY_STATUS:
+ r = amdgpu_userq_query_status(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to query usermode queue status\n");
+ break;
default:
drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index ec040c2fd6c9..48722936ff70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -65,6 +65,7 @@ struct amdgpu_usermode_queue {
struct dma_fence *last_fence;
u32 xcp_id;
int priority;
+ uint64_t generation;
};
struct amdgpu_userq_funcs {
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 05/14] drm/amdgpu/userq: extend queue flags for user queue query status
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (2 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 04/14] drm/amdgpu/userq: implement support for " Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 06/14] drm/amdgpu/userq: extend userq state Prike Liang
` (8 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang
Add the userq flag to identify the invalid userq cases.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
include/uapi/drm/amdgpu_drm.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index efd0d2356077..3c536c8517cd 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -346,6 +346,10 @@ union drm_amdgpu_ctx {
#define AMDGPU_USERQ_QUERY_STATUS_FLAGS_HUNG (1 << 0)
/* indicate vram lost since queue was created */
#define AMDGPU_USERQ_QUERY_STATUS_FLAGS_VRAMLOST (1 << 1)
+/* the invalid userq input argument */
+#define AMDGPU_USERQ_QUERY_STATUS_FLAGS_INVALID_ARG (1 << 2)
+/* the invalid userq VA unmapped */
+#define AMDGPU_USERQ_QUERY_STATUS_FLAGS_INVALID_VA (1 << 3)
/*
* This structure is a container to pass input configuration
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 06/14] drm/amdgpu/userq: extend userq state
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (3 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 05/14] drm/amdgpu/userq: extend queue flags for user queue " Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 07/14] drm/amdgpu: validate userq buffer virtual address and size Prike Liang
` (7 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang
Extend the userq state for identifying the
userq invalid cases, such as the IOCTL invalid
input parameter, and unmap the VA before destroying
userq.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 ++
2 files changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 75b7e39ee576..b670ca8111f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -496,6 +496,10 @@ amdgpu_userq_query_status(struct drm_file *filp, union drm_amdgpu_userq *args)
args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_HUNG;
if (queue->generation != amdgpu_vm_generation(adev, &fpriv->vm))
args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_VRAMLOST;
+ if (queue->state == AMDGPU_USERQ_STATE_INVALID_ARG)
+ args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_INVALID_ARG;
+ if (queue->state == AMDGPU_USERQ_STATE_INVALID_VA)
+ args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_INVALID_VA;
mutex_unlock(&uq_mgr->userq_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 48722936ff70..694f850d102e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -37,6 +37,8 @@ enum amdgpu_userq_state {
AMDGPU_USERQ_STATE_MAPPED,
AMDGPU_USERQ_STATE_PREEMPTED,
AMDGPU_USERQ_STATE_HUNG,
+ AMDGPU_USERQ_STATE_INVALID_ARG,
+ AMDGPU_USERQ_STATE_INVALID_VA,
};
struct amdgpu_mqd_prop;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 07/14] drm/amdgpu: validate userq buffer virtual address and size
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (4 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 06/14] drm/amdgpu/userq: extend userq state Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 08/14] drm/amdgpu: add userq object va track helpers Prike Liang
` (6 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang, Alex Deucher
It needs to validate the userq object virtual address to
determin whether it is residented in a valid vm mapping.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 41 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 ++
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 22 ++++++++++++
3 files changed, 65 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index b670ca8111f3..0aeb7a96ccbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -44,6 +44,38 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
return userq_ip_mask;
}
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ u64 user_addr;
+ u64 size;
+ int r = 0;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto out_err;
+ }
+ /* Only validate the userq whether resident in the VM mapping range */
+ if (user_addr >= va_map->start &&
+ va_map->last - user_addr + 1 >= size) {
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
static int
amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
@@ -399,6 +431,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = -ENOMEM;
goto unlock;
}
+
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, PAGE_SIZE)) {
+ queue->state = AMDGPU_USERQ_STATE_INVALID_ARG;
+ kfree(queue);
+ goto unlock;
+ }
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 694f850d102e..0eb2a9c2e340 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -135,4 +135,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 1457fb49a794..6e29e85bbf9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -206,6 +206,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
struct drm_amdgpu_userq_in *mqd_user = args_in;
struct amdgpu_mqd_prop *userq_props;
+ struct amdgpu_gfx_shadow_info shadow_info;
int r;
/* Structure to initialize MQD for userqueue using generic MQD init function */
@@ -231,6 +232,8 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->doorbell_index = queue->doorbell_index;
userq_props->fence_address = queue->fence_drv->gpu_addr;
+ if (adev->gfx.funcs->get_gfx_shadow_info)
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
@@ -247,6 +250,12 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
+ if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va,
+ max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE))) {
+ queue->state = AMDGPU_USERQ_STATE_INVALID_ARG;
+ goto free_mqd;
+ }
+
userq_props->eop_gpu_addr = compute_mqd->eop_va;
userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
@@ -274,6 +283,13 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->csa_addr = mqd_gfx_v11->csa_va;
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+
+ if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size)) {
+ queue->state = AMDGPU_USERQ_STATE_INVALID_ARG;
+ goto free_mqd;
+ }
+
kfree(mqd_gfx_v11);
} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
@@ -291,6 +307,12 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
+ if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va,
+ shadow_info.csa_size)) {
+ queue->state = AMDGPU_USERQ_STATE_INVALID_ARG;
+ goto free_mqd;
+ }
+
userq_props->csa_addr = mqd_sdma_v11->csa_va;
kfree(mqd_sdma_v11);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 08/14] drm/amdgpu: add userq object va track helpers
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (5 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 07/14] drm/amdgpu: validate userq buffer virtual address and size Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 09/14] drm/amdgpu: track the userq bo va for its obj management Prike Liang
` (5 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang
Add the userq object virtual address get(),mapped() and put()
helpers for tracking the userq obj va address usage.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 172 ++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 14 ++
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 4 +
3 files changed, 189 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 0aeb7a96ccbf..562d12f9d0d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -76,6 +76,174 @@ int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
return r;
}
+int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ u64 user_addr;
+ int r;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!mapping)
+ goto out_err;
+
+ /*
+ * Need to unify the following userq va reference.
+ * mqd bo
+ * rptr bo
+ * wptr bo
+ * eop bo
+ * shadow bo
+ * csa bo
+ */
+ /*amdgpu_bo_ref(mapping->bo_va->base.bo);*/
+ mapping->bo_va->queue_refcount++;
+
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return -EINVAL;
+}
+
+bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ u64 user_addr;
+ bool r;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+
+ if (amdgpu_bo_reserve(vm->root.bo, false))
+ return false;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->queue_refcount > 0)
+ r = true;
+ else
+ r = false;
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ return r;
+}
+
+bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
+ struct amdgpu_usermode_queue *queue)
+{
+
+ switch (queue->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->shadow_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->csa_va))
+ return true;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->eop_va))
+ return true;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
+ amdgpu_userq_buffer_va_mapped(vm, queue->csa_va))
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ u64 user_addr;
+ int r;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!mapping)
+ goto out_err;
+ /*
+ * TODO: It requires figuring out the root cause of userq va mapping
+ * reference imbalance issue.
+ */
+ /*amdgpu_bo_unref(&mapping->bo_va->base.bo);*/
+ mapping->bo_va->queue_refcount--;
+
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return -EINVAL;
+}
+
+static void amdgpu_userq_buffer_vas_get(struct amdgpu_vm *vm,
+ struct amdgpu_usermode_queue *queue)
+{
+
+
+ amdgpu_userq_buffer_va_get(vm, queue->queue_va);
+ amdgpu_userq_buffer_va_get(vm, queue->rptr_va);
+ amdgpu_userq_buffer_va_get(vm, queue->wptr_va);
+
+ switch (queue->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ amdgpu_userq_buffer_va_get(vm, queue->shadow_va);
+ amdgpu_userq_buffer_va_get(vm, queue->csa_va);
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ amdgpu_userq_buffer_va_get(vm, queue->eop_va);
+ break;
+ case AMDGPU_HW_IP_DMA:
+ amdgpu_userq_buffer_va_get(vm, queue->csa_va);
+ break;
+ default:
+ break;
+ }
+}
+
+int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
+ struct amdgpu_usermode_queue *queue)
+{
+ amdgpu_userq_buffer_va_put(vm, queue->queue_va);
+ amdgpu_userq_buffer_va_put(vm, queue->rptr_va);
+ amdgpu_userq_buffer_va_put(vm, queue->wptr_va);
+
+ switch (queue->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ amdgpu_userq_buffer_va_put(vm, queue->shadow_va);
+ amdgpu_userq_buffer_va_put(vm, queue->csa_va);
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ amdgpu_userq_buffer_va_put(vm, queue->eop_va);
+ break;
+ case AMDGPU_HW_IP_DMA:
+ amdgpu_userq_buffer_va_put(vm, queue->csa_va);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int
amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
@@ -445,6 +613,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
queue->vm = &fpriv->vm;
queue->priority = priority;
queue->generation = amdgpu_vm_generation(adev, &fpriv->vm);
+ queue->queue_va = args->in.queue_va;
+ queue->rptr_va = args->in.rptr_va;
+ queue->wptr_va = args->in.wptr_va;
db_info.queue_type = queue->queue_type;
db_info.doorbell_handle = queue->doorbell_handle;
@@ -475,7 +646,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
-
qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
if (qid < 0) {
drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 0eb2a9c2e340..30067f80eadf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -54,6 +54,13 @@ struct amdgpu_usermode_queue {
enum amdgpu_userq_state state;
uint64_t doorbell_handle;
uint64_t doorbell_index;
+ uint64_t queue_va;
+ uint64_t rptr_va;
+ uint64_t wptr_va;
+ uint64_t eop_va;
+ uint64_t shadow_va;
+ uint64_t csa_va;
+
uint64_t flags;
struct amdgpu_mqd_prop *userq_prop;
struct amdgpu_userq_mgr *userq_mgr;
@@ -137,4 +144,11 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
u64 expected_size);
+int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr);
+bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr);
+bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
+ struct amdgpu_usermode_queue *queue);
+int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr);
+int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
+ struct amdgpu_usermode_queue *queue);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 6e29e85bbf9f..42d6cd90be59 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -262,6 +262,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->hqd_active = false;
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ queue->eop_va = compute_mqd->eop_va;
kfree(compute_mqd);
} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
@@ -283,6 +284,8 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->csa_addr = mqd_gfx_v11->csa_va;
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ queue->shadow_va = mqd_gfx_v11->shadow_va;
+ queue->csa_va = mqd_gfx_v11->csa_va;
if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va,
shadow_info.shadow_size)) {
@@ -314,6 +317,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
}
userq_props->csa_addr = mqd_sdma_v11->csa_va;
+ queue->csa_va = mqd_sdma_v11->csa_va;
kfree(mqd_sdma_v11);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 09/14] drm/amdgpu: track the userq bo va for its obj management
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (6 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 08/14] drm/amdgpu: add userq object va track helpers Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 10/14] drm/amdgpu: validate the userq va before destroying Prike Liang
` (4 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang
Track the userq obj for its life time, and reference and
dereference the buffer counter at its creating and destroying
period.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 562d12f9d0d2..6cdfeb224f6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -306,6 +306,8 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+ /* Drop the userq reference. */
+ amdgpu_userq_buffer_vas_put(queue->vm, queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
amdgpu_userq_fence_driver_free(queue);
idr_remove(&uq_mgr->userq_idr, queue_id);
@@ -646,6 +648,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
+ /* refer to the userq objects vm bo*/
+ amdgpu_userq_buffer_vas_get(queue->vm, queue);
+
qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
if (qid < 0) {
drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 10/14] drm/amdgpu: validate the userq va before destroying
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (7 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 09/14] drm/amdgpu: track the userq bo va for its obj management Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 11/14] drm/amdgpu: keeping waiting userq fence infinitely Prike Liang
` (3 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang
The user queue object destroy requires ensuring its
VA keeps mapping prior to the queue being destroyed.
Otherwise, it seems a bug in the user space or VA
freed wrongly, and the kernel driver should report an
invalidated state to the user IOCTL request.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 6cdfeb224f6c..e5891674b4d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -520,6 +520,13 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
amdgpu_bo_unreserve(queue->db_obj.obj);
}
amdgpu_bo_unref(&queue->db_obj.obj);
+ /*
+ * At this point the userq obj va should be mapped,
+ * otherwise will return error to user.
+ */
+ if (!amdgpu_userq_buffer_vas_mapped(&fpriv->vm, queue))
+ queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
/*TODO: It requires a reset for userq hw unmap error*/
if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 11/14] drm/amdgpu: keeping waiting userq fence infinitely
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (8 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 10/14] drm/amdgpu: validate the userq va before destroying Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 12/14] drm/amdgpu: clean up the amdgpu_userq_active() Prike Liang
` (2 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang
Keeping waiting the userq fence infinitely untill
hang detection, and then suspend the hang queue and
set the fence error.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index e5891674b4d0..a78c2caeef41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -283,7 +283,7 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
-static void
+static int
amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
{
@@ -291,11 +291,16 @@ amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
int ret;
if (f && !dma_fence_is_signaled(f)) {
- ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
- if (ret <= 0)
+ ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0) {
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
f->context, f->seqno);
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ return -ETIME;
+ }
}
+
+ return ret;
}
static void
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 12/14] drm/amdgpu: clean up the amdgpu_userq_active()
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (9 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 11/14] drm/amdgpu: keeping waiting userq fence infinitely Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 13/14] drm/amdgpu: validate the queue va for resuming the queue Prike Liang
2025-08-08 6:29 ` [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap Prike Liang
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Prike Liang, Alex Deucher
This is no invocation for amdgpu_userq_active().
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 16 ----------------
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 --
2 files changed, 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index a78c2caeef41..3f8343599deb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -319,22 +319,6 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
kfree(queue);
}
-int
-amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr)
-{
- struct amdgpu_usermode_queue *queue;
- int queue_id;
- int ret = 0;
-
- mutex_lock(&uq_mgr->userq_mutex);
- /* Resume all the queues for this process */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
- ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
-
- mutex_unlock(&uq_mgr->userq_mutex);
- return ret;
-}
-
static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 30067f80eadf..cf35b6140a3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -123,8 +123,6 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_eviction_fence *ev_fence);
-int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr);
-
void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_eviction_fence_mgr *evf_mgr);
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 13/14] drm/amdgpu: validate the queue va for resuming the queue
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (10 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 12/14] drm/amdgpu: clean up the amdgpu_userq_active() Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-08 6:29 ` [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap Prike Liang
12 siblings, 0 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx
Cc: Alexander.Deucher, Christian.Koenig, Prike Liang,
Christian König
It requires validating the userq VA whether is mapped before
trying to resume the queue.
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 3f8343599deb..771f57d09060 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -816,11 +816,18 @@ static int
amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
int queue_id;
int ret = 0, r;
/* Resume all the queues for this process */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+
+ if (!amdgpu_userq_buffer_vas_mapped(&fpriv->vm, queue)) {
+ drm_file_err(uq_mgr->file, "trying restore queue without va mappping\n");
+ continue;
+ }
+
r = amdgpu_userq_map_helper(uq_mgr, queue);
if (r)
ret = r;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
` (11 preceding siblings ...)
2025-08-08 6:29 ` [PATCH v8 13/14] drm/amdgpu: validate the queue va for resuming the queue Prike Liang
@ 2025-08-08 6:29 ` Prike Liang
2025-08-11 6:05 ` Liang, Prike
2025-08-13 22:37 ` Alex Deucher
12 siblings, 2 replies; 19+ messages in thread
From: Prike Liang @ 2025-08-08 6:29 UTC (permalink / raw)
To: amd-gfx
Cc: Alexander.Deucher, Christian.Koenig, Prike Liang,
Christian König
This change validates the userq to see whether can be
unmapped prior to the userq VA GEM unmap. The solution
is based on the following idea:
1) Find out the GEM unmap VA belonds to which userq,
2) Check the userq fence fence whether is signaled,
3) If the userq attached fences signal failed, then
mark it as illegal VA opt and give a warning message
for this illegal userspace request.
Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Prike Liang <Prike.Liang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
3 files changed, 118 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 771f57d09060..314d482849c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
}
}
-
args->out.queue_id = qid;
unlock:
@@ -1214,3 +1213,109 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
mutex_unlock(&adev->userq_mutex);
return ret;
}
+
+/**
+ * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem unmap va
+ * @queue: destinated userq for finding out from unmap va
+ * @va: the GEM unmap virtual address already aligned in mapping range
+ * Find out the corresponding userq by comparing
+ * the GEM unmap VA with userq VAs.
+ */
+static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct amdgpu_usermode_queue *queue,
+ uint64_t va)
+{
+ va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
+
+ switch (queue->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ if (queue->queue_va == va ||
+ queue->wptr_va == va ||
+ queue->rptr_va == va ||
+ queue->shadow_va == va ||
+ queue->csa_va == va)
+ return true;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ if (queue->queue_va == va ||
+ queue->wptr_va == va ||
+ queue->rptr_va == va ||
+ queue->eop_va == va)
+ return true;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ if (queue->queue_va == va ||
+ queue->wptr_va == va ||
+ queue->rptr_va == va ||
+ queue->csa_va == va)
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ uint64_t va)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret;
+
+ if (!ip_mask)
+ return 0;
+
+ /**
+ * validate the unmap va sequence:
+ * 1) Find out the GEM unmap VA belonds to which userq,
+ * 2) Check the userq fence whether is signaled,
+ * 3) If the userq attached fences signal failed, then
+ * mark as invalid va opt and give a warning message
+ * for this illegal userspace request.
+ */
+
+ if (mutex_trylock(&adev->userq_mutex)) {
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+
+ if (!mutex_trylock(&uqm->userq_mutex))
+ continue;
+
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+
+ if (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue, va)) {
+ dev_dbg(uqm->adev->dev, "va: 0x%llx not belond to queue id: %d\n",
+ va, queue_id);
+ continue;
+ }
+
+ if (queue->last_fence && !dma_fence_is_signaled(queue->last_fence)) {
+ drm_file_err(uqm->file, "an illegal VA unmap for the userq\n");
+ queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+ ret = -ETIMEDOUT;
+ goto err;
+ }
+ /*
+ * At here still can't suspend the userq since here just one kind of
+ * VA unmapped, and some other VAs of userq may still be mapped. After
+ * this point this VA mapping will be deteled and the VA will be unmapped
+ * so will not resume the userq when its VA unmapped.
+ */
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ } else {
+ /* Maybe we need a try lock again before return*/
+ return -EBUSY;
+ }
+
+ mutex_unlock(&adev->userq_mutex);
+ return 0;
+err:
+ mutex_unlock(&uqm->userq_mutex);
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index cf35b6140a3d..27ab8a6a7be6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr);
int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
struct amdgpu_usermode_queue *queue);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ uint64_t va);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f042372d9f2e..533954c0d234 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true;
+ int r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
return -ENOENT;
}
+ /* It's unlikely to happen that the mapping userq hasn't been idled
+ * during user requests GEM unmap IOCTL except for forcing the unmap
+ * from user space.
+ */
+
+ r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
+ if (unlikely(r && r != -EBUSY))
+ dev_warn(adev->dev, "Here should be an improper unmap request from user space\n");
+
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* RE: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
2025-08-08 6:29 ` [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap Prike Liang
@ 2025-08-11 6:05 ` Liang, Prike
2025-08-13 22:37 ` Alex Deucher
1 sibling, 0 replies; 19+ messages in thread
From: Liang, Prike @ 2025-08-11 6:05 UTC (permalink / raw)
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander, Koenig, Christian, Koenig, Christian
[Public]
Ping for the series.
Regards,
Prike
> -----Original Message-----
> From: Liang, Prike <Prike.Liang@amd.com>
> Sent: Friday, August 8, 2025 2:29 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Liang, Prike <Prike.Liang@amd.com>; Koenig,
> Christian <Christian.Koenig@amd.com>
> Subject: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
>
> This change validates the userq to see whether can be unmapped prior to the userq
> VA GEM unmap. The solution is based on the following idea:
> 1) Find out the GEM unmap VA belonds to which userq,
> 2) Check the userq fence fence whether is signaled,
> 3) If the userq attached fences signal failed, then
> mark it as illegal VA opt and give a warning message
> for this illegal userspace request.
>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Prike Liang <Prike.Liang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
> 3 files changed, 118 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 771f57d09060..314d482849c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> }
> }
>
> -
> args->out.queue_id = qid;
>
> unlock:
> @@ -1214,3 +1213,109 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
> mutex_unlock(&adev->userq_mutex);
> return ret;
> }
> +
> +/**
> + * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem
> +unmap va
> + * @queue: destinated userq for finding out from unmap va
> + * @va: the GEM unmap virtual address already aligned in mapping range
> + * Find out the corresponding userq by comparing
> + * the GEM unmap VA with userq VAs.
> + */
> +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct
> amdgpu_usermode_queue *queue,
> + uint64_t va) {
> + va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
> +
> + switch (queue->queue_type) {
> + case AMDGPU_HW_IP_GFX:
> + if (queue->queue_va == va ||
> + queue->wptr_va == va ||
> + queue->rptr_va == va ||
> + queue->shadow_va == va ||
> + queue->csa_va == va)
> + return true;
> + break;
> + case AMDGPU_HW_IP_COMPUTE:
> + if (queue->queue_va == va ||
> + queue->wptr_va == va ||
> + queue->rptr_va == va ||
> + queue->eop_va == va)
> + return true;
> + break;
> + case AMDGPU_HW_IP_DMA:
> + if (queue->queue_va == va ||
> + queue->wptr_va == va ||
> + queue->rptr_va == va ||
> + queue->csa_va == va)
> + return true;
> + break;
> + default:
> + break;
> + }
> +
> + return false;
> +}
> +
> +
> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> + uint64_t va)
> +{
> + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> + struct amdgpu_usermode_queue *queue;
> + struct amdgpu_userq_mgr *uqm, *tmp;
> + int queue_id;
> + int ret;
> +
> + if (!ip_mask)
> + return 0;
> +
> + /**
> + * validate the unmap va sequence:
> + * 1) Find out the GEM unmap VA belonds to which userq,
> + * 2) Check the userq fence whether is signaled,
> + * 3) If the userq attached fences signal failed, then
> + * mark as invalid va opt and give a warning message
> + * for this illegal userspace request.
> + */
> +
> + if (mutex_trylock(&adev->userq_mutex)) {
> + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> +
> + if (!mutex_trylock(&uqm->userq_mutex))
> + continue;
> +
> + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> +
> + if
> (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue, va)) {
> + dev_dbg(uqm->adev->dev, "va: 0x%llx not
> belond to queue id: %d\n",
> + va, queue_id);
> + continue;
> + }
> +
> + if (queue->last_fence
> && !dma_fence_is_signaled(queue->last_fence)) {
> + drm_file_err(uqm->file, "an illegal VA unmap for
> the userq\n");
> + queue->state =
> AMDGPU_USERQ_STATE_INVALID_VA;
> + ret = -ETIMEDOUT;
> + goto err;
> + }
> + /*
> + * At here still can't suspend the userq since here just
> one kind of
> + * VA unmapped, and some other VAs of userq may
> still be mapped. After
> + * this point this VA mapping will be deteled and the VA
> will be unmapped
> + * so will not resume the userq when its VA unmapped.
> + */
> + }
> + mutex_unlock(&uqm->userq_mutex);
> + }
> + } else {
> + /* Maybe we need a try lock again before return*/
> + return -EBUSY;
> + }
> +
> + mutex_unlock(&adev->userq_mutex);
> + return 0;
> +err:
> + mutex_unlock(&uqm->userq_mutex);
> + mutex_unlock(&adev->userq_mutex);
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> index cf35b6140a3d..27ab8a6a7be6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> @@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct
> amdgpu_vm *vm, int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64
> addr); int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> struct amdgpu_usermode_queue *queue);
> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> + uint64_t va);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f042372d9f2e..533954c0d234 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> struct amdgpu_bo_va_mapping *mapping;
> struct amdgpu_vm *vm = bo_va->base.vm;
> bool valid = true;
> + int r;
>
> saddr /= AMDGPU_GPU_PAGE_SIZE;
>
> @@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
> *adev,
> return -ENOENT;
> }
>
> + /* It's unlikely to happen that the mapping userq hasn't been idled
> + * during user requests GEM unmap IOCTL except for forcing the unmap
> + * from user space.
> + */
> +
> + r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
> + if (unlikely(r && r != -EBUSY))
> + dev_warn(adev->dev, "Here should be an improper unmap request
> from
> +user space\n");
> +
> list_del(&mapping->list);
> amdgpu_vm_it_remove(mapping, &vm->va);
> mapping->bo_va = NULL;
> --
> 2.34.1
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
2025-08-08 6:29 ` [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap Prike Liang
2025-08-11 6:05 ` Liang, Prike
@ 2025-08-13 22:37 ` Alex Deucher
2025-08-14 10:35 ` Liang, Prike
1 sibling, 1 reply; 19+ messages in thread
From: Alex Deucher @ 2025-08-13 22:37 UTC (permalink / raw)
To: Prike Liang; +Cc: amd-gfx, Alexander.Deucher, Christian.Koenig
The start address may not align with the start address of the vital
queue buffers. Just retain a list of vital virtual address ranges
for each userq and then check if the address range the user wants to
unmap falls into any of those ranges. If so, then preempt and unmap
the queue and set the status to USER_UNMAPPED or something like that.
Then you don't have to worry about queue specific details as to what
addresses are vital for that queue type.
Alex
On Fri, Aug 8, 2025 at 2:29 AM Prike Liang <Prike.Liang@amd.com> wrote:
>
> This change validates the userq to see whether can be
> unmapped prior to the userq VA GEM unmap. The solution
> is based on the following idea:
> 1) Find out the GEM unmap VA belonds to which userq,
> 2) Check the userq fence fence whether is signaled,
> 3) If the userq attached fences signal failed, then
> mark it as illegal VA opt and give a warning message
> for this illegal userspace request.
>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Prike Liang <Prike.Liang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
> 3 files changed, 118 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 771f57d09060..314d482849c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
> }
> }
>
> -
> args->out.queue_id = qid;
>
> unlock:
> @@ -1214,3 +1213,109 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
> mutex_unlock(&adev->userq_mutex);
> return ret;
> }
> +
> +/**
> + * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem unmap va
> + * @queue: destinated userq for finding out from unmap va
> + * @va: the GEM unmap virtual address already aligned in mapping range
> + * Find out the corresponding userq by comparing
> + * the GEM unmap VA with userq VAs.
> + */
> +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct amdgpu_usermode_queue *queue,
> + uint64_t va)
> +{
> + va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
> +
> + switch (queue->queue_type) {
> + case AMDGPU_HW_IP_GFX:
> + if (queue->queue_va == va ||
> + queue->wptr_va == va ||
> + queue->rptr_va == va ||
> + queue->shadow_va == va ||
> + queue->csa_va == va)
> + return true;
> + break;
> + case AMDGPU_HW_IP_COMPUTE:
> + if (queue->queue_va == va ||
> + queue->wptr_va == va ||
> + queue->rptr_va == va ||
> + queue->eop_va == va)
> + return true;
> + break;
> + case AMDGPU_HW_IP_DMA:
> + if (queue->queue_va == va ||
> + queue->wptr_va == va ||
> + queue->rptr_va == va ||
> + queue->csa_va == va)
> + return true;
> + break;
> + default:
> + break;
> + }
> +
> + return false;
> +}
> +
> +
> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> + uint64_t va)
> +{
> + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> + struct amdgpu_usermode_queue *queue;
> + struct amdgpu_userq_mgr *uqm, *tmp;
> + int queue_id;
> + int ret;
> +
> + if (!ip_mask)
> + return 0;
> +
> + /**
> + * validate the unmap va sequence:
> + * 1) Find out the GEM unmap VA belonds to which userq,
> + * 2) Check the userq fence whether is signaled,
> + * 3) If the userq attached fences signal failed, then
> + * mark as invalid va opt and give a warning message
> + * for this illegal userspace request.
> + */
> +
> + if (mutex_trylock(&adev->userq_mutex)) {
> + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> +
> + if (!mutex_trylock(&uqm->userq_mutex))
> + continue;
> +
> + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> +
> + if (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue, va)) {
> + dev_dbg(uqm->adev->dev, "va: 0x%llx not belond to queue id: %d\n",
> + va, queue_id);
> + continue;
> + }
> +
> + if (queue->last_fence && !dma_fence_is_signaled(queue->last_fence)) {
> + drm_file_err(uqm->file, "an illegal VA unmap for the userq\n");
> + queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
> + ret = -ETIMEDOUT;
> + goto err;
> + }
> + /*
> + * At here still can't suspend the userq since here just one kind of
> + * VA unmapped, and some other VAs of userq may still be mapped. After
> + * this point this VA mapping will be deteled and the VA will be unmapped
> + * so will not resume the userq when its VA unmapped.
> + */
> + }
> + mutex_unlock(&uqm->userq_mutex);
> + }
> + } else {
> + /* Maybe we need a try lock again before return*/
> + return -EBUSY;
> + }
> +
> + mutex_unlock(&adev->userq_mutex);
> + return 0;
> +err:
> + mutex_unlock(&uqm->userq_mutex);
> + mutex_unlock(&adev->userq_mutex);
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> index cf35b6140a3d..27ab8a6a7be6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> @@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr);
> int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> struct amdgpu_usermode_queue *queue);
> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> + uint64_t va);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f042372d9f2e..533954c0d234 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> struct amdgpu_bo_va_mapping *mapping;
> struct amdgpu_vm *vm = bo_va->base.vm;
> bool valid = true;
> + int r;
>
> saddr /= AMDGPU_GPU_PAGE_SIZE;
>
> @@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> return -ENOENT;
> }
>
> + /* It's unlikely to happen that the mapping userq hasn't been idled
> + * during user requests GEM unmap IOCTL except for forcing the unmap
> + * from user space.
> + */
> +
> + r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
> + if (unlikely(r && r != -EBUSY))
> + dev_warn(adev->dev, "Here should be an improper unmap request from user space\n");
> +
> list_del(&mapping->list);
> amdgpu_vm_it_remove(mapping, &vm->va);
> mapping->bo_va = NULL;
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
2025-08-13 22:37 ` Alex Deucher
@ 2025-08-14 10:35 ` Liang, Prike
2025-08-14 12:11 ` Christian König
0 siblings, 1 reply; 19+ messages in thread
From: Liang, Prike @ 2025-08-14 10:35 UTC (permalink / raw)
To: Alex Deucher
Cc: amd-gfx@lists.freedesktop.org, Deucher, Alexander,
Koenig, Christian
[Public]
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Thursday, August 14, 2025 6:38 AM
> To: Liang, Prike <Prike.Liang@amd.com>
> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
> Subject: Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
>
> The start address may not align with the start address of the vital
> queue buffers. Just retain a list of vital virtual address ranges
> for each userq and then check if the address range the user wants to
> unmap falls into any of those ranges. If so, then preempt and unmap
> the queue and set the status to USER_UNMAPPED or something like that.
[Prike] Each userq has various virtual addresses, but this unmap IOCTL request can only unmap
one kind of VA at a time, so do we need to validate the userq whether all the VAs have been unmapped
before unmapping the userq HW mapping?
Aside from that, do we still need to identify the invalid userq VA unmap case by checking userq fence to
see whether it is signaled when user is trying to unmap one of its VAs? Without this check, how do we
identify the userq GEM unmap VA case?
> Then you don't have to worry about queue specific details as to what addresses are
> vital for that queue type.
>
> Alex
>
> On Fri, Aug 8, 2025 at 2:29 AM Prike Liang <Prike.Liang@amd.com> wrote:
> >
> > This change validates the userq to see whether can be unmapped prior
> > to the userq VA GEM unmap. The solution is based on the following
> > idea:
> > 1) Find out the GEM unmap VA belonds to which userq,
> > 2) Check the userq fence fence whether is signaled,
> > 3) If the userq attached fences signal failed, then
> > mark it as illegal VA opt and give a warning message
> > for this illegal userspace request.
> >
> > Suggested-by: Christian König <christian.koenig@amd.com>
> > Signed-off-by: Prike Liang <Prike.Liang@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++-
> > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
> > 3 files changed, 118 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > index 771f57d09060..314d482849c8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > @@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> > }
> > }
> >
> > -
> > args->out.queue_id = qid;
> >
> > unlock:
> > @@ -1214,3 +1213,109 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
> > mutex_unlock(&adev->userq_mutex);
> > return ret;
> > }
> > +
> > +/**
> > + * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem
> > +unmap va
> > + * @queue: destinated userq for finding out from unmap va
> > + * @va: the GEM unmap virtual address already aligned in mapping
> > +range
> > + * Find out the corresponding userq by comparing
> > + * the GEM unmap VA with userq VAs.
> > + */
> > +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct
> amdgpu_usermode_queue *queue,
> > + uint64_t va) {
> > + va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
> > +
> > + switch (queue->queue_type) {
> > + case AMDGPU_HW_IP_GFX:
> > + if (queue->queue_va == va ||
> > + queue->wptr_va == va ||
> > + queue->rptr_va == va ||
> > + queue->shadow_va == va ||
> > + queue->csa_va == va)
> > + return true;
> > + break;
> > + case AMDGPU_HW_IP_COMPUTE:
> > + if (queue->queue_va == va ||
> > + queue->wptr_va == va ||
> > + queue->rptr_va == va ||
> > + queue->eop_va == va)
> > + return true;
> > + break;
> > + case AMDGPU_HW_IP_DMA:
> > + if (queue->queue_va == va ||
> > + queue->wptr_va == va ||
> > + queue->rptr_va == va ||
> > + queue->csa_va == va)
> > + return true;
> > + break;
> > + default:
> > + break;
> > + }
> > +
> > + return false;
> > +}
> > +
> > +
> > +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> > + uint64_t va) {
> > + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> > + struct amdgpu_usermode_queue *queue;
> > + struct amdgpu_userq_mgr *uqm, *tmp;
> > + int queue_id;
> > + int ret;
> > +
> > + if (!ip_mask)
> > + return 0;
> > +
> > + /**
> > + * validate the unmap va sequence:
> > + * 1) Find out the GEM unmap VA belonds to which userq,
> > + * 2) Check the userq fence whether is signaled,
> > + * 3) If the userq attached fences signal failed, then
> > + * mark as invalid va opt and give a warning message
> > + * for this illegal userspace request.
> > + */
> > +
> > + if (mutex_trylock(&adev->userq_mutex)) {
> > + list_for_each_entry_safe(uqm, tmp,
> > + &adev->userq_mgr_list, list) {
> > +
> > + if (!mutex_trylock(&uqm->userq_mutex))
> > + continue;
> > +
> > + idr_for_each_entry(&uqm->userq_idr, queue,
> > + queue_id) {
> > +
> > + if (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue,
> va)) {
> > + dev_dbg(uqm->adev->dev, "va: 0x%llx not belond to
> queue id: %d\n",
> > + va, queue_id);
> > + continue;
> > + }
> > +
> > + if (queue->last_fence && !dma_fence_is_signaled(queue-
> >last_fence)) {
> > + drm_file_err(uqm->file, "an illegal VA unmap for the
> userq\n");
> > + queue->state =
> AMDGPU_USERQ_STATE_INVALID_VA;
> > + ret = -ETIMEDOUT;
> > + goto err;
> > + }
> > + /*
> > + * At here still can't suspend the userq since here just one
> kind of
> > + * VA unmapped, and some other VAs of userq may still be
> mapped. After
> > + * this point this VA mapping will be deteled and the VA will be
> unmapped
> > + * so will not resume the userq when its VA unmapped.
> > + */
> > + }
> > + mutex_unlock(&uqm->userq_mutex);
> > + }
> > + } else {
> > + /* Maybe we need a try lock again before return*/
> > + return -EBUSY;
> > + }
> > +
> > + mutex_unlock(&adev->userq_mutex);
> > + return 0;
> > +err:
> > + mutex_unlock(&uqm->userq_mutex);
> > + mutex_unlock(&adev->userq_mutex);
> > + return ret;
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > index cf35b6140a3d..27ab8a6a7be6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > @@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct
> > amdgpu_vm *vm, int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm,
> > u64 addr); int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> > struct amdgpu_usermode_queue *queue);
> > +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> > + uint64_t va);
> > #endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index f042372d9f2e..533954c0d234 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
> *adev,
> > struct amdgpu_bo_va_mapping *mapping;
> > struct amdgpu_vm *vm = bo_va->base.vm;
> > bool valid = true;
> > + int r;
> >
> > saddr /= AMDGPU_GPU_PAGE_SIZE;
> >
> > @@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
> *adev,
> > return -ENOENT;
> > }
> >
> > + /* It's unlikely to happen that the mapping userq hasn't been idled
> > + * during user requests GEM unmap IOCTL except for forcing the unmap
> > + * from user space.
> > + */
> > +
> > + r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
> > + if (unlikely(r && r != -EBUSY))
> > + dev_warn(adev->dev, "Here should be an improper unmap
> > + request from user space\n");
> > +
> > list_del(&mapping->list);
> > amdgpu_vm_it_remove(mapping, &vm->va);
> > mapping->bo_va = NULL;
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
2025-08-14 10:35 ` Liang, Prike
@ 2025-08-14 12:11 ` Christian König
2025-08-15 14:48 ` Liang, Prike
0 siblings, 1 reply; 19+ messages in thread
From: Christian König @ 2025-08-14 12:11 UTC (permalink / raw)
To: Liang, Prike, Alex Deucher
Cc: amd-gfx@lists.freedesktop.org, Deucher, Alexander
On 14.08.25 12:35, Liang, Prike wrote:
> [Public]
>
>> From: Alex Deucher <alexdeucher@gmail.com>
>> Sent: Thursday, August 14, 2025 6:38 AM
>> To: Liang, Prike <Prike.Liang@amd.com>
>> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
>> <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
>> Subject: Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
>>
>> The start address may not align with the start address of the vital
>> queue buffers. Just retain a list of vital virtual address ranges
>> for each userq and then check if the address range the user wants to
>> unmap falls into any of those ranges. If so, then preempt and unmap
>> the queue and set the status to USER_UNMAPPED or something like that.
> [Prike] Each userq has various virtual addresses, but this unmap IOCTL request can only unmap
> one kind of VA at a time
That is not even remotely correct. The CLEAR and REPLACE operations can unmap many VAs at a time.
>, so do we need to validate the userq whether all the VAs have been unmapped
> before unmapping the userq HW mapping?
No, we need to unmap the userq HW mapping before we unmap the VA addresses.
>
> Aside from that, do we still need to identify the invalid userq VA unmap case by checking userq fence to
> see whether it is signaled when user is trying to unmap one of its VAs? Without this check, how do we
> identify the userq GEM unmap VA case?
I don't fully understand the question, please clarify.
Regards,
Christian.
>
>> Then you don't have to worry about queue specific details as to what addresses are
>> vital for that queue type.
>>
>> Alex
>>
>> On Fri, Aug 8, 2025 at 2:29 AM Prike Liang <Prike.Liang@amd.com> wrote:
>>>
>>> This change validates the userq to see whether can be unmapped prior
>>> to the userq VA GEM unmap. The solution is based on the following
>>> idea:
>>> 1) Find out the GEM unmap VA belonds to which userq,
>>> 2) Check the userq fence fence whether is signaled,
>>> 3) If the userq attached fences signal failed, then
>>> mark it as illegal VA opt and give a warning message
>>> for this illegal userspace request.
>>>
>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Prike Liang <Prike.Liang@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
>>> 3 files changed, 118 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> index 771f57d09060..314d482849c8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> @@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union
>> drm_amdgpu_userq *args)
>>> }
>>> }
>>>
>>> -
>>> args->out.queue_id = qid;
>>>
>>> unlock:
>>> @@ -1214,3 +1213,109 @@ int
>> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
>>> mutex_unlock(&adev->userq_mutex);
>>> return ret;
>>> }
>>> +
>>> +/**
>>> + * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem
>>> +unmap va
>>> + * @queue: destinated userq for finding out from unmap va
>>> + * @va: the GEM unmap virtual address already aligned in mapping
>>> +range
>>> + * Find out the corresponding userq by comparing
>>> + * the GEM unmap VA with userq VAs.
>>> + */
>>> +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct
>> amdgpu_usermode_queue *queue,
>>> + uint64_t va) {
>>> + va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
>>> +
>>> + switch (queue->queue_type) {
>>> + case AMDGPU_HW_IP_GFX:
>>> + if (queue->queue_va == va ||
>>> + queue->wptr_va == va ||
>>> + queue->rptr_va == va ||
>>> + queue->shadow_va == va ||
>>> + queue->csa_va == va)
>>> + return true;
>>> + break;
>>> + case AMDGPU_HW_IP_COMPUTE:
>>> + if (queue->queue_va == va ||
>>> + queue->wptr_va == va ||
>>> + queue->rptr_va == va ||
>>> + queue->eop_va == va)
>>> + return true;
>>> + break;
>>> + case AMDGPU_HW_IP_DMA:
>>> + if (queue->queue_va == va ||
>>> + queue->wptr_va == va ||
>>> + queue->rptr_va == va ||
>>> + queue->csa_va == va)
>>> + return true;
>>> + break;
>>> + default:
>>> + break;
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> +
>>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
>>> + uint64_t va) {
>>> + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
>>> + struct amdgpu_usermode_queue *queue;
>>> + struct amdgpu_userq_mgr *uqm, *tmp;
>>> + int queue_id;
>>> + int ret;
>>> +
>>> + if (!ip_mask)
>>> + return 0;
>>> +
>>> + /**
>>> + * validate the unmap va sequence:
>>> + * 1) Find out the GEM unmap VA belonds to which userq,
>>> + * 2) Check the userq fence whether is signaled,
>>> + * 3) If the userq attached fences signal failed, then
>>> + * mark as invalid va opt and give a warning message
>>> + * for this illegal userspace request.
>>> + */
>>> +
>>> + if (mutex_trylock(&adev->userq_mutex)) {
>>> + list_for_each_entry_safe(uqm, tmp,
>>> + &adev->userq_mgr_list, list) {
>>> +
>>> + if (!mutex_trylock(&uqm->userq_mutex))
>>> + continue;
>>> +
>>> + idr_for_each_entry(&uqm->userq_idr, queue,
>>> + queue_id) {
>>> +
>>> + if (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue,
>> va)) {
>>> + dev_dbg(uqm->adev->dev, "va: 0x%llx not belond to
>> queue id: %d\n",
>>> + va, queue_id);
>>> + continue;
>>> + }
>>> +
>>> + if (queue->last_fence && !dma_fence_is_signaled(queue-
>>> last_fence)) {
>>> + drm_file_err(uqm->file, "an illegal VA unmap for the
>> userq\n");
>>> + queue->state =
>> AMDGPU_USERQ_STATE_INVALID_VA;
>>> + ret = -ETIMEDOUT;
>>> + goto err;
>>> + }
>>> + /*
>>> + * At here still can't suspend the userq since here just one
>> kind of
>>> + * VA unmapped, and some other VAs of userq may still be
>> mapped. After
>>> + * this point this VA mapping will be deteled and the VA will be
>> unmapped
>>> + * so will not resume the userq when its VA unmapped.
>>> + */
>>> + }
>>> + mutex_unlock(&uqm->userq_mutex);
>>> + }
>>> + } else {
>>> + /* Maybe we need a try lock again before return*/
>>> + return -EBUSY;
>>> + }
>>> +
>>> + mutex_unlock(&adev->userq_mutex);
>>> + return 0;
>>> +err:
>>> + mutex_unlock(&uqm->userq_mutex);
>>> + mutex_unlock(&adev->userq_mutex);
>>> + return ret;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> index cf35b6140a3d..27ab8a6a7be6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> @@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct
>>> amdgpu_vm *vm, int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm,
>>> u64 addr); int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
>>> struct amdgpu_usermode_queue *queue);
>>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
>>> + uint64_t va);
>>> #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index f042372d9f2e..533954c0d234 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
>> *adev,
>>> struct amdgpu_bo_va_mapping *mapping;
>>> struct amdgpu_vm *vm = bo_va->base.vm;
>>> bool valid = true;
>>> + int r;
>>>
>>> saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>
>>> @@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
>> *adev,
>>> return -ENOENT;
>>> }
>>>
>>> + /* It's unlikely to happen that the mapping userq hasn't been idled
>>> + * during user requests GEM unmap IOCTL except for forcing the unmap
>>> + * from user space.
>>> + */
>>> +
>>> + r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
>>> + if (unlikely(r && r != -EBUSY))
>>> + dev_warn(adev->dev, "Here should be an improper unmap
>>> + request from user space\n");
>>> +
>>> list_del(&mapping->list);
>>> amdgpu_vm_it_remove(mapping, &vm->va);
>>> mapping->bo_va = NULL;
>>> --
>>> 2.34.1
>>>
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
2025-08-14 12:11 ` Christian König
@ 2025-08-15 14:48 ` Liang, Prike
0 siblings, 0 replies; 19+ messages in thread
From: Liang, Prike @ 2025-08-15 14:48 UTC (permalink / raw)
To: Koenig, Christian, Alex Deucher
Cc: amd-gfx@lists.freedesktop.org, Deucher, Alexander
[Public]
> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig@amd.com>
> Sent: Thursday, August 14, 2025 8:12 PM
> To: Liang, Prike <Prike.Liang@amd.com>; Alex Deucher
> <alexdeucher@gmail.com>
> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap
>
> On 14.08.25 12:35, Liang, Prike wrote:
> > [Public]
> >
> >> From: Alex Deucher <alexdeucher@gmail.com>
> >> Sent: Thursday, August 14, 2025 6:38 AM
> >> To: Liang, Prike <Prike.Liang@amd.com>
> >> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> >> <Alexander.Deucher@amd.com>; Koenig, Christian
> >> <Christian.Koenig@amd.com>
> >> Subject: Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM
> >> unmap
> >>
> >> The start address may not align with the start address of the vital
> >> queue buffers. Just retain a list of vital virtual address ranges
> >> for each userq and then check if the address range the user wants to
> >> unmap falls into any of those ranges. If so, then preempt and unmap
> >> the queue and set the status to USER_UNMAPPED or something like that.
> > [Prike] Each userq has various virtual addresses, but this unmap IOCTL
> > request can only unmap one kind of VA at a time
>
> That is not even remotely correct. The CLEAR and REPLACE operations can
> unmap many VAs at a time.
Yes, I will consider that case as well, but for now I want to track the userq VA at the UNMAP operation first.
>
> >, so do we need to validate the userq whether all the VAs have been
> >unmapped before unmapping the userq HW mapping?
>
> No, we need to unmap the userq HW mapping before we unmap the VA addresses.
I examined the hardware mapping status for the VA being unmapped and found that the
user queue still holds a hardware mapping. This may be because the user queue or another
associated object still has VAs mapped; attempting to unmap the queue now could hang the system.
So, how can we unmap the user queue to prevent further use during the UNMAP IOCTL?
> >
> > Aside from that, do we still need to identify the invalid userq VA
> > unmap case by checking userq fence to see whether it is signaled when
> > user is trying to unmap one of its VAs? Without this check, how do we identify the
> userq GEM unmap VA case?
>
> I don't fully understand the question, please clarify.
Sorry, I mean how can we detect invalid user queue VA unmapping cases in the UNMAP IOCTL if we cannot obtain the user queue status from the VA being unmapped? In this patch I want to detect cases where a VA is unmapped while its user queue is still busy. Beyond that, what other kinds of invalid user-queue VA mappings should we handle?
> Regards,
> Christian.
>
> >
> >> Then you don't have to worry about queue specific details as to what
> >> addresses are vital for that queue type.
> >>
> >> Alex
> >>
> >> On Fri, Aug 8, 2025 at 2:29 AM Prike Liang <Prike.Liang@amd.com> wrote:
> >>>
> >>> This change validates the userq to see whether can be unmapped prior
> >>> to the userq VA GEM unmap. The solution is based on the following
> >>> idea:
> >>> 1) Find out the GEM unmap VA belonds to which userq,
> >>> 2) Check the userq fence fence whether is signaled,
> >>> 3) If the userq attached fences signal failed, then
> >>> mark it as illegal VA opt and give a warning message
> >>> for this illegal userspace request.
> >>>
> >>> Suggested-by: Christian König <christian.koenig@amd.com>
> >>> Signed-off-by: Prike Liang <Prike.Liang@amd.com>
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107
> +++++++++++++++++++++-
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
> >>> 3 files changed, 118 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> index 771f57d09060..314d482849c8 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> @@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>> }
> >>> }
> >>>
> >>> -
> >>> args->out.queue_id = qid;
> >>>
> >>> unlock:
> >>> @@ -1214,3 +1213,109 @@ int
> >> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device
> >> *adev,
> >>> mutex_unlock(&adev->userq_mutex);
> >>> return ret;
> >>> }
> >>> +
> >>> +/**
> >>> + * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem
> >>> +unmap va
> >>> + * @queue: destinated userq for finding out from unmap va
> >>> + * @va: the GEM unmap virtual address already aligned in mapping
> >>> +range
> >>> + * Find out the corresponding userq by comparing
> >>> + * the GEM unmap VA with userq VAs.
> >>> + */
> >>> +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct
> >> amdgpu_usermode_queue *queue,
> >>> + uint64_t va) {
> >>> + va = va << AMDGPU_GPU_PAGE_SHIFT |
> AMDGPU_GMC_HOLE_END;
> >>> +
> >>> + switch (queue->queue_type) {
> >>> + case AMDGPU_HW_IP_GFX:
> >>> + if (queue->queue_va == va ||
> >>> + queue->wptr_va == va ||
> >>> + queue->rptr_va == va ||
> >>> + queue->shadow_va == va ||
> >>> + queue->csa_va == va)
> >>> + return true;
> >>> + break;
> >>> + case AMDGPU_HW_IP_COMPUTE:
> >>> + if (queue->queue_va == va ||
> >>> + queue->wptr_va == va ||
> >>> + queue->rptr_va == va ||
> >>> + queue->eop_va == va)
> >>> + return true;
> >>> + break;
> >>> + case AMDGPU_HW_IP_DMA:
> >>> + if (queue->queue_va == va ||
> >>> + queue->wptr_va == va ||
> >>> + queue->rptr_va == va ||
> >>> + queue->csa_va == va)
> >>> + return true;
> >>> + break;
> >>> + default:
> >>> + break;
> >>> + }
> >>> +
> >>> + return false;
> >>> +}
> >>> +
> >>> +
> >>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> >>> + uint64_t va) {
> >>> + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >>> + struct amdgpu_usermode_queue *queue;
> >>> + struct amdgpu_userq_mgr *uqm, *tmp;
> >>> + int queue_id;
> >>> + int ret;
> >>> +
> >>> + if (!ip_mask)
> >>> + return 0;
> >>> +
> >>> + /**
> >>> + * validate the unmap va sequence:
> >>> + * 1) Find out the GEM unmap VA belonds to which userq,
> >>> + * 2) Check the userq fence whether is signaled,
> >>> + * 3) If the userq attached fences signal failed, then
> >>> + * mark as invalid va opt and give a warning message
> >>> + * for this illegal userspace request.
> >>> + */
> >>> +
> >>> + if (mutex_trylock(&adev->userq_mutex)) {
> >>> + list_for_each_entry_safe(uqm, tmp,
> >>> + &adev->userq_mgr_list, list) {
> >>> +
> >>> + if (!mutex_trylock(&uqm->userq_mutex))
> >>> + continue;
> >>> +
> >>> + idr_for_each_entry(&uqm->userq_idr, queue,
> >>> + queue_id) {
> >>> +
> >>> + if
> >>> + (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue,
> >> va)) {
> >>> + dev_dbg(uqm->adev->dev, "va:
> >>> + 0x%llx not belond to
> >> queue id: %d\n",
> >>> + va, queue_id);
> >>> + continue;
> >>> + }
> >>> +
> >>> + if (queue->last_fence &&
> >>> + !dma_fence_is_signaled(queue-
> >>> last_fence)) {
> >>> + drm_file_err(uqm->file, "an
> >>> + illegal VA unmap for the
> >> userq\n");
> >>> + queue->state =
> >> AMDGPU_USERQ_STATE_INVALID_VA;
> >>> + ret = -ETIMEDOUT;
> >>> + goto err;
> >>> + }
> >>> + /*
> >>> + * At here still can't suspend the
> >>> + userq since here just one
> >> kind of
> >>> + * VA unmapped, and some other VAs
> >>> + of userq may still be
> >> mapped. After
> >>> + * this point this VA mapping will
> >>> + be deteled and the VA will be
> >> unmapped
> >>> + * so will not resume the userq when its VA unmapped.
> >>> + */
> >>> + }
> >>> + mutex_unlock(&uqm->userq_mutex);
> >>> + }
> >>> + } else {
> >>> + /* Maybe we need a try lock again before return*/
> >>> + return -EBUSY;
> >>> + }
> >>> +
> >>> + mutex_unlock(&adev->userq_mutex);
> >>> + return 0;
> >>> +err:
> >>> + mutex_unlock(&uqm->userq_mutex);
> >>> + mutex_unlock(&adev->userq_mutex);
> >>> + return ret;
> >>> +}
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> index cf35b6140a3d..27ab8a6a7be6 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> @@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct
> >>> amdgpu_vm *vm, int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm,
> >>> u64 addr); int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> >>> struct amdgpu_usermode_queue *queue);
> >>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> >>> + uint64_t va);
> >>> #endif
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> index f042372d9f2e..533954c0d234 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> @@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
> >> *adev,
> >>> struct amdgpu_bo_va_mapping *mapping;
> >>> struct amdgpu_vm *vm = bo_va->base.vm;
> >>> bool valid = true;
> >>> + int r;
> >>>
> >>> saddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>
> >>> @@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
> >> *adev,
> >>> return -ENOENT;
> >>> }
> >>>
> >>> + /* It's unlikely to happen that the mapping userq hasn't been idled
> >>> + * during user requests GEM unmap IOCTL except for forcing the unmap
> >>> + * from user space.
> >>> + */
> >>> +
> >>> + r = amdgpu_userq_gem_va_unmap_validate(adev, saddr);
> >>> + if (unlikely(r && r != -EBUSY))
> >>> + dev_warn(adev->dev, "Here should be an improper
> >>> + unmap request from user space\n");
> >>> +
> >>> list_del(&mapping->list);
> >>> amdgpu_vm_it_remove(mapping, &vm->va);
> >>> mapping->bo_va = NULL;
> >>> --
> >>> 2.34.1
> >>>
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2025-08-15 14:48 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-08 6:28 [PATCH v8 01/14] drm/amdgpu: validate userq input args Prike Liang
2025-08-08 6:28 ` [PATCH v8 02/14] drm/amdgpu: validate userq hw unmap status for destroying userq Prike Liang
2025-08-08 6:28 ` [PATCH v8 03/14] drm/amdgpu: add UAPI for user queue query status Prike Liang
2025-08-08 6:29 ` [PATCH v8 04/14] drm/amdgpu/userq: implement support for " Prike Liang
2025-08-08 6:29 ` [PATCH v8 05/14] drm/amdgpu/userq: extend queue flags for user queue " Prike Liang
2025-08-08 6:29 ` [PATCH v8 06/14] drm/amdgpu/userq: extend userq state Prike Liang
2025-08-08 6:29 ` [PATCH v8 07/14] drm/amdgpu: validate userq buffer virtual address and size Prike Liang
2025-08-08 6:29 ` [PATCH v8 08/14] drm/amdgpu: add userq object va track helpers Prike Liang
2025-08-08 6:29 ` [PATCH v8 09/14] drm/amdgpu: track the userq bo va for its obj management Prike Liang
2025-08-08 6:29 ` [PATCH v8 10/14] drm/amdgpu: validate the userq va before destroying Prike Liang
2025-08-08 6:29 ` [PATCH v8 11/14] drm/amdgpu: keeping waiting userq fence infinitely Prike Liang
2025-08-08 6:29 ` [PATCH v8 12/14] drm/amdgpu: clean up the amdgpu_userq_active() Prike Liang
2025-08-08 6:29 ` [PATCH v8 13/14] drm/amdgpu: validate the queue va for resuming the queue Prike Liang
2025-08-08 6:29 ` [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap Prike Liang
2025-08-11 6:05 ` Liang, Prike
2025-08-13 22:37 ` Alex Deucher
2025-08-14 10:35 ` Liang, Prike
2025-08-14 12:11 ` Christian König
2025-08-15 14:48 ` Liang, Prike
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).