* [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
@ 2024-12-19 10:38 Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-12-19 10:38 UTC (permalink / raw)
To: amd-gfx; +Cc: christian.koenig, alexander.deucher, Arunpravin Paneer Selvam
Fix out-of-bounds issue in userq fence create when
accessing the userq xa structure. Added a lock to
protect the race condition.
v2:(Christian)
- Acquire xa lock only for the xa_for_each blocks and
not for the kvmalloc_array() memory allocation part.
v3:
- Replacing the kvmalloc_array() storage with xa_alloc()
solves the problem.
BUG: KASAN: slab-out-of-bounds in amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[ +0.000006] Call Trace:
[ +0.000005] <TASK>
[ +0.000005] dump_stack_lvl+0x6c/0x90
[ +0.000011] print_report+0xc4/0x5e0
[ +0.000009] ? srso_return_thunk+0x5/0x5f
[ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0
[ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[ +0.000405] kasan_report+0xdf/0x120
[ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[ +0.000405] __asan_report_store8_noabort+0x17/0x20
[ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
[ +0.000408] ? srso_return_thunk+0x5/0x5f
[ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
[ +0.000013] ? srso_return_thunk+0x5/0x5f
[ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
[ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[ +0.000404] ? try_to_wake_up+0x165/0x1840
[ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10
[ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm]
[ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
[ +0.000043] ? __kasan_check_read+0x11/0x20
[ +0.000007] ? srso_return_thunk+0x5/0x5f
[ +0.000007] ? __kasan_check_write+0x14/0x20
[ +0.000008] drm_ioctl+0x513/0xd20 [drm]
[ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm]
[ +0.000044] ? srso_return_thunk+0x5/0x5f
[ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100
[ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30
[ +0.000008] ? srso_return_thunk+0x5/0x5f
[ +0.000008] ? srso_return_thunk+0x5/0x5f
[ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50
[ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
[ +0.000388] __x64_sys_ioctl+0x135/0x1b0
[ +0.000009] x64_sys_call+0x1205/0x20d0
[ +0.000007] do_syscall_64+0x4d/0x120
[ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ +0.000007] RIP: 0033:0x7f7c3d31a94f
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 43 +++++++------------
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +-
2 files changed, 17 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 2e7271362ace..4289bed6c1f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
{
+ struct amdgpu_userq_fence_driver *stored_fence_drv;
struct amdgpu_userq_fence *userq_fence, *tmp;
struct dma_fence *fence;
+ unsigned long index;
u64 rptr;
- int i;
if (!fence_drv)
return;
@@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
dma_fence_signal(fence);
- for (i = 0; i < userq_fence->fence_drv_array_count; i++)
- amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+ xa_for_each(&userq_fence->fence_drv_xa, index, stored_fence_drv)
+ amdgpu_userq_fence_driver_put(stored_fence_drv);
list_del(&userq_fence->link);
dma_fence_put(fence);
@@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
fence_drv->context, seq);
+ xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
+
amdgpu_userq_fence_driver_get(fence_drv);
dma_fence_get(fence);
if (!xa_empty(&userq->fence_drv_xa)) {
struct amdgpu_userq_fence_driver *stored_fence_drv;
- unsigned long index, count = 0;
- int i = 0;
-
- xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
- count++;
-
- userq_fence->fence_drv_array =
- kvmalloc_array(count,
- sizeof(struct amdgpu_userq_fence_driver *),
- GFP_KERNEL);
-
- if (userq_fence->fence_drv_array) {
- xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
- userq_fence->fence_drv_array[i] = stored_fence_drv;
- xa_erase(&userq->fence_drv_xa, index);
- i++;
- }
+ unsigned long index_uq;
+ u32 index_uf;
+ int err;
+
+ xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
+ err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
+ stored_fence_drv, xa_limit_32b, GFP_KERNEL);
+ if (err)
+ return err;
}
-
- userq_fence->fence_drv_array_count = i;
- } else {
- userq_fence->fence_drv_array = NULL;
- userq_fence->fence_drv_array_count = 0;
+ xa_destroy(&userq->fence_drv_xa);
}
/* Check if hardware has already processed the job */
@@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
/* Release the fence driver reference */
amdgpu_userq_fence_driver_put(fence_drv);
-
- kvfree(userq_fence->fence_drv_array);
kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index f1a90840ac1f..3283e5573d10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
*/
spinlock_t lock;
struct list_head link;
- unsigned long fence_drv_array_count;
+ struct xarray fence_drv_xa;
struct amdgpu_userq_fence_driver *fence_drv;
- struct amdgpu_userq_fence_driver **fence_drv_array;
};
struct amdgpu_userq_fence_driver {
--
2.25.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability
2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
@ 2024-12-19 10:38 ` Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
2 siblings, 0 replies; 8+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-12-19 10:38 UTC (permalink / raw)
To: amd-gfx; +Cc: christian.koenig, alexander.deucher, Arunpravin Paneer Selvam
Make the xa field names used in the userqueue and userfence
structure more understandable and add more clarity.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 31 ++++++++++---------
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 4 +--
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 8 ++---
.../gpu/drm/amd/include/amdgpu_userqueue.h | 2 +-
4 files changed, 23 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 4289bed6c1f7..6876f24e5eda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -97,7 +97,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
spin_lock_init(&fence_drv->fence_list_lock);
fence_drv->adev = adev;
- fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa;
+ fence_drv->xa_uq = &userq->xa_uq;
fence_drv->context = dma_fence_context_alloc(1);
get_task_comm(fence_drv->timeline_name, current);
@@ -122,8 +122,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
{
- struct amdgpu_userq_fence_driver *stored_fence_drv;
struct amdgpu_userq_fence *userq_fence, *tmp;
+ struct amdgpu_userq_fence_driver *fdrv;
struct dma_fence *fence;
unsigned long index;
u64 rptr;
@@ -142,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
dma_fence_signal(fence);
- xa_for_each(&userq_fence->fence_drv_xa, index, stored_fence_drv)
- amdgpu_userq_fence_driver_put(stored_fence_drv);
+ xa_for_each(&userq_fence->xa_uf, index, fdrv)
+ amdgpu_userq_fence_driver_put(fdrv);
list_del(&userq_fence->link);
dma_fence_put(fence);
@@ -222,24 +222,25 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
fence_drv->context, seq);
- xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&userq_fence->xa_uf, XA_FLAGS_ALLOC);
amdgpu_userq_fence_driver_get(fence_drv);
dma_fence_get(fence);
- if (!xa_empty(&userq->fence_drv_xa)) {
- struct amdgpu_userq_fence_driver *stored_fence_drv;
+ /* Move all fence driver entries from xa_uq to xa_uf */
+ if (!xa_empty(&userq->xa_uq)) {
+ struct amdgpu_userq_fence_driver *fdrv;
unsigned long index_uq;
u32 index_uf;
int err;
- xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
- err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
- stored_fence_drv, xa_limit_32b, GFP_KERNEL);
+ xa_for_each(&userq->xa_uq, index_uq, fdrv) {
+ err = xa_alloc_irq(&userq_fence->xa_uf, &index_uf,
+ fdrv, xa_limit_32b, GFP_KERNEL);
if (err)
return err;
}
- xa_destroy(&userq->fence_drv_xa);
+ xa_destroy(&userq->xa_uq);
}
/* Check if hardware has already processed the job */
@@ -809,7 +810,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
for (i = 0, cnt = 0; i < num_fences; i++) {
struct amdgpu_userq_fence_driver *fence_drv;
struct amdgpu_userq_fence *userq_fence;
- u32 index;
+ u32 index_uq;
userq_fence = to_amdgpu_userq_fence(fences[i]);
if (!userq_fence) {
@@ -834,9 +835,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
* Otherwise, we would gather those references until we don't
* have any more space left and crash.
*/
- if (fence_drv->fence_drv_xa_ptr) {
- r = xa_alloc(fence_drv->fence_drv_xa_ptr, &index, fence_drv,
- xa_limit_32b, GFP_KERNEL);
+ if (fence_drv->xa_uq) {
+ r = xa_alloc(fence_drv->xa_uq, &index_uq,
+ fence_drv, xa_limit_32b, GFP_KERNEL);
if (r)
goto free_fences;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index 3283e5573d10..74edabc8e021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -37,7 +37,7 @@ struct amdgpu_userq_fence {
*/
spinlock_t lock;
struct list_head link;
- struct xarray fence_drv_xa;
+ struct xarray xa_uf;
struct amdgpu_userq_fence_driver *fence_drv;
};
@@ -54,7 +54,7 @@ struct amdgpu_userq_fence_driver {
spinlock_t fence_list_lock;
struct list_head fences;
struct amdgpu_device *adev;
- struct xarray *fence_drv_xa_ptr;
+ struct xarray *xa_uq;
char timeline_name[TASK_COMM_LEN];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 85baba323ba5..72ff623f8c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -48,8 +48,8 @@ static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
static void
amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
{
- amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
- xa_destroy(&userq->fence_drv_xa);
+ amdgpu_userq_walk_and_drop_fence_drv(&userq->xa_uq);
+ xa_destroy(&userq->xa_uq);
/* Drop the fence_drv reference held by user queue */
amdgpu_userq_fence_driver_put(userq->fence_drv);
}
@@ -73,7 +73,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
}
uq_funcs->mqd_destroy(uq_mgr, queue);
- queue->fence_drv->fence_drv_xa_ptr = NULL;
+ queue->fence_drv->xa_uq = NULL;
amdgpu_userq_fence_driver_free(queue);
idr_remove(&uq_mgr->userq_idr, queue_id);
kfree(queue);
@@ -315,7 +315,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
}
queue->doorbell_index = index;
- xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&queue->xa_uq, XA_FLAGS_ALLOC);
r = amdgpu_userq_fence_driver_alloc(adev, queue);
if (r) {
DRM_ERROR("Failed to alloc fence driver\n");
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index e7e8d79b689d..8048a5fb917a 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -53,7 +53,7 @@ struct amdgpu_usermode_queue {
struct amdgpu_userq_obj db_obj;
struct amdgpu_userq_obj fw_obj;
struct amdgpu_userq_obj wptr_obj;
- struct xarray fence_drv_xa;
+ struct xarray xa_uq;
struct amdgpu_userq_fence_driver *fence_drv;
struct dma_fence *last_fence;
};
--
2.25.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues.
2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
@ 2024-12-19 10:38 ` Arunpravin Paneer Selvam
2024-12-19 10:43 ` Christian König
2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
2 siblings, 1 reply; 8+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-12-19 10:38 UTC (permalink / raw)
To: amd-gfx; +Cc: christian.koenig, alexander.deucher, Arunpravin Paneer Selvam
Fix the wait IOCTL lockup issue.
v2:(Christian)
- The problem is that you need to lock all BOs under a single
drm_exec_until_all_locked() loop. Otherwise not all BOs
would be locked again on contention.
v3: Locking vm pd when accessing the GEM BO's solves the lockup
issues.
[Dec 6 17:53] watchdog: BUG: soft lockup - CPU#4 stuck for 26s! [Xwayland:cs0:2634]
[ +0.000002] RIP: 0010:amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
[ +0.000003] RSP: 0018:ffffada901f4fc38 EFLAGS: 00000202
[ +0.000003] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000001
[ +0.000001] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: ffffada901f4fce8
[ +0.000002] RBP: ffffada901f4fd58 R08: ffff9fc28b974148 R09: 0000000000000000
[ +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
[ +0.000002] R13: 0000000000000000 R14: 0000000000000010 R15: ffffada901f4fe00
[ +0.000002] FS: 00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
[ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
[ +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
[ +0.000001] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
[ +0.000002] Call Trace:
[ +0.000002] <IRQ>
[ +0.000004] ? show_regs+0x69/0x80
[ +0.000005] ? watchdog_timer_fn+0x271/0x300
[ +0.000005] ? __pfx_watchdog_timer_fn+0x10/0x10
[ +0.000003] ? __hrtimer_run_queues+0x114/0x2a0
[ +0.000006] ? hrtimer_interrupt+0x110/0x240
[ +0.000005] ? __sysvec_apic_timer_interrupt+0x73/0x180
[ +0.000004] ? sysvec_apic_timer_interrupt+0xaa/0xd0
[ +0.000004] </IRQ>
[ +0.000002] <TASK>
[ +0.000002] ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[ +0.000006] ? amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
[ +0.000162] ? amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
[ +0.000156] ? finish_task_switch.isra.0+0x94/0x290
[ +0.000010] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[ +0.000161] drm_ioctl_kernel+0xaa/0x100 [drm]
[ +0.000025] drm_ioctl+0x29d/0x500 [drm]
[ +0.000017] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[ +0.000160] ? srso_return_thunk+0x5/0x5f
[ +0.000004] ? srso_return_thunk+0x5/0x5f
[ +0.000003] ? _raw_spin_unlock_irqrestore+0x27/0x50
[ +0.000004] amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
[ +0.000141] __x64_sys_ioctl+0x95/0xd0
[ +0.000005] x64_sys_call+0x1205/0x20d0
[ +0.000003] do_syscall_64+0x4d/0x120
[ +0.000004] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ +0.000002] RIP: 0033:0x7f3a0bb1a94f
[ +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
[ +0.000001] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
[ +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
[ +0.000002] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
[ +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
[ +0.000005] </TASK>
[ +27.998164] watchdog: BUG: soft lockup - CPU#4 stuck for 52s! [Xwayland:cs0:2634]
[ +0.000002] RIP: 0010:drm_exec_unlock_all+0x76/0xc0 [drm_exec]
[ +0.000002] RSP: 0018:ffffada901f4fbf8 EFLAGS: 00000246
[ +0.000003] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001
[ +0.000002] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: 0000000000000000
[ +0.000001] RBP: ffffada901f4fc10 R08: ffff9fc28b974148 R09: 0000000000000000
[ +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
[ +0.000002] R13: 00000000ffffffff R14: ffffada901f4fce8 R15: ffffada901f4fe00
[ +0.000002] FS: 00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
[ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
[ +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
[ +0.000002] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
[ +0.000002] Call Trace:
[ +0.000002] <IRQ>
[ +0.000003] ? show_regs+0x69/0x80
[ +0.000006] ? watchdog_timer_fn+0x271/0x300
[ +0.000004] ? __pfx_watchdog_timer_fn+0x10/0x10
[ +0.000003] ? __hrtimer_run_queues+0x114/0x2a0
[ +0.000006] ? hrtimer_interrupt+0x110/0x240
[ +0.000005] ? __sysvec_apic_timer_interrupt+0x73/0x180
[ +0.000004] ? sysvec_apic_timer_interrupt+0xaa/0xd0
[ +0.000004] </IRQ>
[ +0.000002] <TASK>
[ +0.000002] ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[ +0.000006] ? drm_exec_unlock_all+0x76/0xc0 [drm_exec]
[ +0.000004] drm_exec_cleanup+0x77/0x90 [drm_exec]
[ +0.000004] amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
[ +0.000206] ? finish_task_switch.isra.0+0x94/0x290
[ +0.000010] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[ +0.000159] drm_ioctl_kernel+0xaa/0x100 [drm]
[ +0.000026] drm_ioctl+0x29d/0x500 [drm]
[ +0.000017] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[ +0.000163] ? srso_return_thunk+0x5/0x5f
[ +0.000005] ? srso_return_thunk+0x5/0x5f
[ +0.000002] ? _raw_spin_unlock_irqrestore+0x27/0x50
[ +0.000005] amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
[ +0.000143] __x64_sys_ioctl+0x95/0xd0
[ +0.000005] x64_sys_call+0x1205/0x20d0
[ +0.000004] do_syscall_64+0x4d/0x120
[ +0.000003] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ +0.000002] RIP: 0033:0x7f3a0bb1a94f
[ +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
[ +0.000002] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
[ +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
[ +0.000001] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
[ +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
[ +0.000006] </TASK>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 45 ++++++++++++-------
1 file changed, 29 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 6876f24e5eda..35d306fc069d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -463,22 +463,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
goto put_gobj_write;
}
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
- (num_read_bo_handles + num_write_bo_handles));
-
- /* Lock all BOs with retry handling */
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
- drm_exec_retry_on_contention(&exec);
- if (r)
- goto exec_fini;
-
- r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
- drm_exec_retry_on_contention(&exec);
- if (r)
- goto exec_fini;
- }
-
r = amdgpu_userq_fence_read_wptr(queue, &wptr);
if (r)
goto exec_fini;
@@ -495,6 +479,27 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
queue->last_fence = dma_fence_get(fence);
mutex_unlock(&uq_mgr->userq_mutex);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(queue->vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto exec_fini;
+
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto exec_fini;
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto exec_fini;
+ }
+
for (i = 0; i < num_read_bo_handles; i++) {
if (!gobj_read || !gobj_read[i]->resv)
continue;
@@ -558,6 +563,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
struct drm_amdgpu_userq_fence_info *fence_info = NULL;
struct drm_amdgpu_userq_wait *wait_info = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object **gobj_write;
struct drm_gem_object **gobj_read;
struct dma_fence **fences = NULL;
@@ -635,6 +641,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
/* Lock all BOs with retry handling */
drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+
r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
if (r) {
--
2.25.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
@ 2024-12-19 10:41 ` Christian König
2024-12-20 10:34 ` Paneer Selvam, Arunpravin
2 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2024-12-19 10:41 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, amd-gfx; +Cc: alexander.deucher
Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
> Fix out-of-bounds issue in userq fence create when
> accessing the userq xa structure. Added a lock to
> protect the race condition.
>
> v2:(Christian)
> - Acquire xa lock only for the xa_for_each blocks and
> not for the kvmalloc_array() memory allocation part.
>
> v3:
> - Replacing the kvmalloc_array() storage with xa_alloc()
> solves the problem.
>
> BUG: KASAN: slab-out-of-bounds in amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [ +0.000006] Call Trace:
> [ +0.000005] <TASK>
> [ +0.000005] dump_stack_lvl+0x6c/0x90
> [ +0.000011] print_report+0xc4/0x5e0
> [ +0.000009] ? srso_return_thunk+0x5/0x5f
> [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0
> [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [ +0.000405] kasan_report+0xdf/0x120
> [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [ +0.000405] __asan_report_store8_noabort+0x17/0x20
> [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
> [ +0.000408] ? srso_return_thunk+0x5/0x5f
> [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
> [ +0.000013] ? srso_return_thunk+0x5/0x5f
> [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
> [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
> [ +0.000404] ? try_to_wake_up+0x165/0x1840
> [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10
> [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm]
> [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
> [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
> [ +0.000043] ? __kasan_check_read+0x11/0x20
> [ +0.000007] ? srso_return_thunk+0x5/0x5f
> [ +0.000007] ? __kasan_check_write+0x14/0x20
> [ +0.000008] drm_ioctl+0x513/0xd20 [drm]
> [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
> [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm]
> [ +0.000044] ? srso_return_thunk+0x5/0x5f
> [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100
> [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
> [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30
> [ +0.000008] ? srso_return_thunk+0x5/0x5f
> [ +0.000008] ? srso_return_thunk+0x5/0x5f
> [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50
> [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
> [ +0.000388] __x64_sys_ioctl+0x135/0x1b0
> [ +0.000009] x64_sys_call+0x1205/0x20d0
> [ +0.000007] do_syscall_64+0x4d/0x120
> [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [ +0.000007] RIP: 0033:0x7f7c3d31a94f
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 43 +++++++------------
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +-
> 2 files changed, 17 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 2e7271362ace..4289bed6c1f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
>
> void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
> {
> + struct amdgpu_userq_fence_driver *stored_fence_drv;
> struct amdgpu_userq_fence *userq_fence, *tmp;
> struct dma_fence *fence;
> + unsigned long index;
> u64 rptr;
> - int i;
>
> if (!fence_drv)
> return;
> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
>
> dma_fence_signal(fence);
>
> - for (i = 0; i < userq_fence->fence_drv_array_count; i++)
> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
> + xa_for_each(&userq_fence->fence_drv_xa, index, stored_fence_drv)
> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>
> list_del(&userq_fence->link);
> dma_fence_put(fence);
> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
> dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
> fence_drv->context, seq);
>
> + xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
> +
> amdgpu_userq_fence_driver_get(fence_drv);
> dma_fence_get(fence);
>
> if (!xa_empty(&userq->fence_drv_xa)) {
> struct amdgpu_userq_fence_driver *stored_fence_drv;
> - unsigned long index, count = 0;
> - int i = 0;
> -
> - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
> - count++;
> -
> - userq_fence->fence_drv_array =
> - kvmalloc_array(count,
> - sizeof(struct amdgpu_userq_fence_driver *),
> - GFP_KERNEL);
> -
> - if (userq_fence->fence_drv_array) {
> - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
> - userq_fence->fence_drv_array[i] = stored_fence_drv;
> - xa_erase(&userq->fence_drv_xa, index);
> - i++;
> - }
> + unsigned long index_uq;
> + u32 index_uf;
> + int err;
> +
> + xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
> + err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
> + stored_fence_drv, xa_limit_32b, GFP_KERNEL);
That is even worse than what was discussed before. Now you have two XAs
and the second is incorrectly using GFP_KERNEL.
Regards,
Christian.
> + if (err)
> + return err;
> }
> -
> - userq_fence->fence_drv_array_count = i;
> - } else {
> - userq_fence->fence_drv_array = NULL;
> - userq_fence->fence_drv_array_count = 0;
> + xa_destroy(&userq->fence_drv_xa);
> }
>
> /* Check if hardware has already processed the job */
> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
>
> /* Release the fence driver reference */
> amdgpu_userq_fence_driver_put(fence_drv);
> -
> - kvfree(userq_fence->fence_drv_array);
> kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index f1a90840ac1f..3283e5573d10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
> */
> spinlock_t lock;
> struct list_head link;
> - unsigned long fence_drv_array_count;
> + struct xarray fence_drv_xa;
> struct amdgpu_userq_fence_driver *fence_drv;
> - struct amdgpu_userq_fence_driver **fence_drv_array;
> };
>
> struct amdgpu_userq_fence_driver {
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues.
2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
@ 2024-12-19 10:43 ` Christian König
0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2024-12-19 10:43 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, amd-gfx; +Cc: alexander.deucher
Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
> Fix the wait IOCTL lockup issue.
>
> v2:(Christian)
> - The problem is that you need to lock all BOs under a single
> drm_exec_until_all_locked() loop. Otherwise not all BOs
> would be locked again on contention.
>
> v3: Locking vm pd when accessing the GEM BO's solves the lockup
> issues.
>
> [Dec 6 17:53] watchdog: BUG: soft lockup - CPU#4 stuck for 26s! [Xwayland:cs0:2634]
> [ +0.000002] RIP: 0010:amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
> [ +0.000003] RSP: 0018:ffffada901f4fc38 EFLAGS: 00000202
> [ +0.000003] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000001
> [ +0.000001] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: ffffada901f4fce8
> [ +0.000002] RBP: ffffada901f4fd58 R08: ffff9fc28b974148 R09: 0000000000000000
> [ +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
> [ +0.000002] R13: 0000000000000000 R14: 0000000000000010 R15: ffffada901f4fe00
> [ +0.000002] FS: 00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
> [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
> [ +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
> [ +0.000001] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
> [ +0.000002] Call Trace:
> [ +0.000002] <IRQ>
> [ +0.000004] ? show_regs+0x69/0x80
> [ +0.000005] ? watchdog_timer_fn+0x271/0x300
> [ +0.000005] ? __pfx_watchdog_timer_fn+0x10/0x10
> [ +0.000003] ? __hrtimer_run_queues+0x114/0x2a0
> [ +0.000006] ? hrtimer_interrupt+0x110/0x240
> [ +0.000005] ? __sysvec_apic_timer_interrupt+0x73/0x180
> [ +0.000004] ? sysvec_apic_timer_interrupt+0xaa/0xd0
> [ +0.000004] </IRQ>
> [ +0.000002] <TASK>
> [ +0.000002] ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
> [ +0.000006] ? amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
> [ +0.000162] ? amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
> [ +0.000156] ? finish_task_switch.isra.0+0x94/0x290
> [ +0.000010] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [ +0.000161] drm_ioctl_kernel+0xaa/0x100 [drm]
> [ +0.000025] drm_ioctl+0x29d/0x500 [drm]
> [ +0.000017] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [ +0.000160] ? srso_return_thunk+0x5/0x5f
> [ +0.000004] ? srso_return_thunk+0x5/0x5f
> [ +0.000003] ? _raw_spin_unlock_irqrestore+0x27/0x50
> [ +0.000004] amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
> [ +0.000141] __x64_sys_ioctl+0x95/0xd0
> [ +0.000005] x64_sys_call+0x1205/0x20d0
> [ +0.000003] do_syscall_64+0x4d/0x120
> [ +0.000004] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [ +0.000002] RIP: 0033:0x7f3a0bb1a94f
> [ +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [ +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
> [ +0.000001] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
> [ +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
> [ +0.000002] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
> [ +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
> [ +0.000005] </TASK>
> [ +27.998164] watchdog: BUG: soft lockup - CPU#4 stuck for 52s! [Xwayland:cs0:2634]
> [ +0.000002] RIP: 0010:drm_exec_unlock_all+0x76/0xc0 [drm_exec]
> [ +0.000002] RSP: 0018:ffffada901f4fbf8 EFLAGS: 00000246
> [ +0.000003] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001
> [ +0.000002] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: 0000000000000000
> [ +0.000001] RBP: ffffada901f4fc10 R08: ffff9fc28b974148 R09: 0000000000000000
> [ +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
> [ +0.000002] R13: 00000000ffffffff R14: ffffada901f4fce8 R15: ffffada901f4fe00
> [ +0.000002] FS: 00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
> [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
> [ +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
> [ +0.000002] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
> [ +0.000002] Call Trace:
> [ +0.000002] <IRQ>
> [ +0.000003] ? show_regs+0x69/0x80
> [ +0.000006] ? watchdog_timer_fn+0x271/0x300
> [ +0.000004] ? __pfx_watchdog_timer_fn+0x10/0x10
> [ +0.000003] ? __hrtimer_run_queues+0x114/0x2a0
> [ +0.000006] ? hrtimer_interrupt+0x110/0x240
> [ +0.000005] ? __sysvec_apic_timer_interrupt+0x73/0x180
> [ +0.000004] ? sysvec_apic_timer_interrupt+0xaa/0xd0
> [ +0.000004] </IRQ>
> [ +0.000002] <TASK>
> [ +0.000002] ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
> [ +0.000006] ? drm_exec_unlock_all+0x76/0xc0 [drm_exec]
> [ +0.000004] drm_exec_cleanup+0x77/0x90 [drm_exec]
> [ +0.000004] amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
> [ +0.000206] ? finish_task_switch.isra.0+0x94/0x290
> [ +0.000010] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [ +0.000159] drm_ioctl_kernel+0xaa/0x100 [drm]
> [ +0.000026] drm_ioctl+0x29d/0x500 [drm]
> [ +0.000017] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [ +0.000163] ? srso_return_thunk+0x5/0x5f
> [ +0.000005] ? srso_return_thunk+0x5/0x5f
> [ +0.000002] ? _raw_spin_unlock_irqrestore+0x27/0x50
> [ +0.000005] amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
> [ +0.000143] __x64_sys_ioctl+0x95/0xd0
> [ +0.000005] x64_sys_call+0x1205/0x20d0
> [ +0.000004] do_syscall_64+0x4d/0x120
> [ +0.000003] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [ +0.000002] RIP: 0033:0x7f3a0bb1a94f
> [ +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [ +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
> [ +0.000002] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
> [ +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
> [ +0.000001] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
> [ +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
> [ +0.000006] </TASK>
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 45 ++++++++++++-------
> 1 file changed, 29 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 6876f24e5eda..35d306fc069d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -463,22 +463,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> goto put_gobj_write;
> }
>
> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
> - (num_read_bo_handles + num_write_bo_handles));
> -
> - /* Lock all BOs with retry handling */
> - drm_exec_until_all_locked(&exec) {
> - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
> - drm_exec_retry_on_contention(&exec);
> - if (r)
> - goto exec_fini;
> -
> - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
> - drm_exec_retry_on_contention(&exec);
> - if (r)
> - goto exec_fini;
> - }
> -
> r = amdgpu_userq_fence_read_wptr(queue, &wptr);
> if (r)
> goto exec_fini;
> @@ -495,6 +479,27 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> queue->last_fence = dma_fence_get(fence);
> mutex_unlock(&uq_mgr->userq_mutex);
>
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
> + (num_read_bo_handles + num_write_bo_handles));
> +
> + /* Lock all BOs with retry handling */
> + drm_exec_until_all_locked(&exec) {
> + r = amdgpu_vm_lock_pd(queue->vm, &exec, 0);
Well that absolutely makes no sense at all.
First of all we need to lock the BOs *before* we create the fence. Then
there is no need to lock the VM PD here.
I have no idea what you are trying to do here.
Regards,
Christian.
> + drm_exec_retry_on_contention(&exec);
> + if (r)
> + goto exec_fini;
> +
> + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
> + drm_exec_retry_on_contention(&exec);
> + if (r)
> + goto exec_fini;
> +
> + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
> + drm_exec_retry_on_contention(&exec);
> + if (r)
> + goto exec_fini;
> + }
> +
> for (i = 0; i < num_read_bo_handles; i++) {
> if (!gobj_read || !gobj_read[i]->resv)
> continue;
> @@ -558,6 +563,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
> struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> struct drm_amdgpu_userq_wait *wait_info = data;
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> struct drm_gem_object **gobj_write;
> struct drm_gem_object **gobj_read;
> struct dma_fence **fences = NULL;
> @@ -635,6 +641,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>
> /* Lock all BOs with retry handling */
> drm_exec_until_all_locked(&exec) {
> + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
> + drm_exec_retry_on_contention(&exec);
> + if (r) {
> + drm_exec_fini(&exec);
> + goto put_gobj_write;
> + }
> +
> r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
> drm_exec_retry_on_contention(&exec);
> if (r) {
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
@ 2024-12-20 10:34 ` Paneer Selvam, Arunpravin
2024-12-20 10:38 ` Christian König
0 siblings, 1 reply; 8+ messages in thread
From: Paneer Selvam, Arunpravin @ 2024-12-20 10:34 UTC (permalink / raw)
To: Christian König, amd-gfx; +Cc: alexander.deucher
Hi Christian,
On 12/19/2024 4:11 PM, Christian König wrote:
>
>
> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>> Fix out-of-bounds issue in userq fence create when
>> accessing the userq xa structure. Added a lock to
>> protect the race condition.
>>
>> v2:(Christian)
>> - Acquire xa lock only for the xa_for_each blocks and
>> not for the kvmalloc_array() memory allocation part.
>>
>> v3:
>> - Replacing the kvmalloc_array() storage with xa_alloc()
>> solves the problem.
>>
>> BUG: KASAN: slab-out-of-bounds in
>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [ +0.000006] Call Trace:
>> [ +0.000005] <TASK>
>> [ +0.000005] dump_stack_lvl+0x6c/0x90
>> [ +0.000011] print_report+0xc4/0x5e0
>> [ +0.000009] ? srso_return_thunk+0x5/0x5f
>> [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0
>> [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [ +0.000405] kasan_report+0xdf/0x120
>> [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [ +0.000405] __asan_report_store8_noabort+0x17/0x20
>> [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>> [ +0.000408] ? srso_return_thunk+0x5/0x5f
>> [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>> [ +0.000013] ? srso_return_thunk+0x5/0x5f
>> [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>> [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>> [ +0.000404] ? try_to_wake_up+0x165/0x1840
>> [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10
>> [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm]
>> [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>> [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>> [ +0.000043] ? __kasan_check_read+0x11/0x20
>> [ +0.000007] ? srso_return_thunk+0x5/0x5f
>> [ +0.000007] ? __kasan_check_write+0x14/0x20
>> [ +0.000008] drm_ioctl+0x513/0xd20 [drm]
>> [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>> [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm]
>> [ +0.000044] ? srso_return_thunk+0x5/0x5f
>> [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100
>> [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>> [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30
>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>> [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50
>> [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>> [ +0.000388] __x64_sys_ioctl+0x135/0x1b0
>> [ +0.000009] x64_sys_call+0x1205/0x20d0
>> [ +0.000007] do_syscall_64+0x4d/0x120
>> [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> [ +0.000007] RIP: 0033:0x7f7c3d31a94f
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam@amd.com>
>> ---
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 43 +++++++------------
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +-
>> 2 files changed, 17 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> index 2e7271362ace..4289bed6c1f7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct
>> amdgpu_device *adev,
>> void amdgpu_userq_fence_driver_process(struct
>> amdgpu_userq_fence_driver *fence_drv)
>> {
>> + struct amdgpu_userq_fence_driver *stored_fence_drv;
>> struct amdgpu_userq_fence *userq_fence, *tmp;
>> struct dma_fence *fence;
>> + unsigned long index;
>> u64 rptr;
>> - int i;
>> if (!fence_drv)
>> return;
>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct
>> amdgpu_userq_fence_driver *fence_d
>> dma_fence_signal(fence);
>> - for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>> + xa_for_each(&userq_fence->fence_drv_xa, index,
>> stored_fence_drv)
>> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>> list_del(&userq_fence->link);
>> dma_fence_put(fence);
>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct
>> amdgpu_usermode_queue *userq,
>> dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
>> fence_drv->context, seq);
>> + xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>> +
>> amdgpu_userq_fence_driver_get(fence_drv);
>> dma_fence_get(fence);
>> if (!xa_empty(&userq->fence_drv_xa)) {
>> struct amdgpu_userq_fence_driver *stored_fence_drv;
>> - unsigned long index, count = 0;
>> - int i = 0;
>> -
>> - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>> - count++;
>> -
>> - userq_fence->fence_drv_array =
>> - kvmalloc_array(count,
>> - sizeof(struct amdgpu_userq_fence_driver *),
>> - GFP_KERNEL);
>> -
>> - if (userq_fence->fence_drv_array) {
>> - xa_for_each(&userq->fence_drv_xa, index,
>> stored_fence_drv) {
>> - userq_fence->fence_drv_array[i] = stored_fence_drv;
>> - xa_erase(&userq->fence_drv_xa, index);
>> - i++;
>> - }
>> + unsigned long index_uq;
>> + u32 index_uf;
>> + int err;
>> +
>> + xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
>> + err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>> + stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>
> That is even worse than what was discussed before. Now you have two
> XAs and the second is incorrectly using GFP_KERNEL.
I think the problem here is, the WAIT IOCTL updates the
userq->fence_drv_xa entries between the 2 xa_for_each blocks
exactly at kvmalloc_array memory allocation. Though, we are locking the
first and second xa_for_each blocks and having the
GFP_ATOMIC in place didnt help to resolve the problem.
For example,
kvmalloc_array() is allocating the memory for the count value(say 5) and
before we acquire the second
xa_for_each block lock, the count modified to (say 7) by the WAIT IOCTL
xa_alloc() function (by acquiring the same lock),
and we would be iterating for the new count. But the memory allocated
would be for 5 entries.
xa_lock()
first xa_for_each block to count the entries
xa_unlock()
kvmalloc_array allocates for count 5
xa_lock()
second xa_for_each block to move the entries to allocated memory
here the count increased to 7
xa_unlock
Thanks,
Arun.
>
> Regards,
> Christian.
>
>> + if (err)
>> + return err;
>> }
>> -
>> - userq_fence->fence_drv_array_count = i;
>> - } else {
>> - userq_fence->fence_drv_array = NULL;
>> - userq_fence->fence_drv_array_count = 0;
>> + xa_destroy(&userq->fence_drv_xa);
>> }
>> /* Check if hardware has already processed the job */
>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct
>> rcu_head *rcu)
>> /* Release the fence driver reference */
>> amdgpu_userq_fence_driver_put(fence_drv);
>> -
>> - kvfree(userq_fence->fence_drv_array);
>> kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> index f1a90840ac1f..3283e5573d10 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>> */
>> spinlock_t lock;
>> struct list_head link;
>> - unsigned long fence_drv_array_count;
>> + struct xarray fence_drv_xa;
>> struct amdgpu_userq_fence_driver *fence_drv;
>> - struct amdgpu_userq_fence_driver **fence_drv_array;
>> };
>> struct amdgpu_userq_fence_driver {
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
2024-12-20 10:34 ` Paneer Selvam, Arunpravin
@ 2024-12-20 10:38 ` Christian König
2024-12-20 10:45 ` Paneer Selvam, Arunpravin
0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2024-12-20 10:38 UTC (permalink / raw)
To: Paneer Selvam, Arunpravin, amd-gfx; +Cc: alexander.deucher
Hi Arun,
Am 20.12.24 um 11:34 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
>
> On 12/19/2024 4:11 PM, Christian König wrote:
>>
>>
>> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>>> Fix out-of-bounds issue in userq fence create when
>>> accessing the userq xa structure. Added a lock to
>>> protect the race condition.
>>>
>>> v2:(Christian)
>>> - Acquire xa lock only for the xa_for_each blocks and
>>> not for the kvmalloc_array() memory allocation part.
>>>
>>> v3:
>>> - Replacing the kvmalloc_array() storage with xa_alloc()
>>> solves the problem.
>>>
>>> BUG: KASAN: slab-out-of-bounds in
>>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000006] Call Trace:
>>> [ +0.000005] <TASK>
>>> [ +0.000005] dump_stack_lvl+0x6c/0x90
>>> [ +0.000011] print_report+0xc4/0x5e0
>>> [ +0.000009] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0
>>> [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000405] kasan_report+0xdf/0x120
>>> [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000405] __asan_report_store8_noabort+0x17/0x20
>>> [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>>> [ +0.000408] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>>> [ +0.000013] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>>> [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [ +0.000404] ? try_to_wake_up+0x165/0x1840
>>> [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10
>>> [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm]
>>> [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>>> [ +0.000043] ? __kasan_check_read+0x11/0x20
>>> [ +0.000007] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000007] ? __kasan_check_write+0x14/0x20
>>> [ +0.000008] drm_ioctl+0x513/0xd20 [drm]
>>> [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm]
>>> [ +0.000044] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100
>>> [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>>> [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30
>>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50
>>> [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>>> [ +0.000388] __x64_sys_ioctl+0x135/0x1b0
>>> [ +0.000009] x64_sys_call+0x1205/0x20d0
>>> [ +0.000007] do_syscall_64+0x4d/0x120
>>> [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>> [ +0.000007] RIP: 0033:0x7f7c3d31a94f
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> ---
>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 43
>>> +++++++------------
>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +-
>>> 2 files changed, 17 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> index 2e7271362ace..4289bed6c1f7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct
>>> amdgpu_device *adev,
>>> void amdgpu_userq_fence_driver_process(struct
>>> amdgpu_userq_fence_driver *fence_drv)
>>> {
>>> + struct amdgpu_userq_fence_driver *stored_fence_drv;
>>> struct amdgpu_userq_fence *userq_fence, *tmp;
>>> struct dma_fence *fence;
>>> + unsigned long index;
>>> u64 rptr;
>>> - int i;
>>> if (!fence_drv)
>>> return;
>>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct
>>> amdgpu_userq_fence_driver *fence_d
>>> dma_fence_signal(fence);
>>> - for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>>> + xa_for_each(&userq_fence->fence_drv_xa, index,
>>> stored_fence_drv)
>>> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>>> list_del(&userq_fence->link);
>>> dma_fence_put(fence);
>>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct
>>> amdgpu_usermode_queue *userq,
>>> dma_fence_init(fence, &amdgpu_userq_fence_ops,
>>> &userq_fence->lock,
>>> fence_drv->context, seq);
>>> + xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>>> +
>>> amdgpu_userq_fence_driver_get(fence_drv);
>>> dma_fence_get(fence);
>>> if (!xa_empty(&userq->fence_drv_xa)) {
>>> struct amdgpu_userq_fence_driver *stored_fence_drv;
>>> - unsigned long index, count = 0;
>>> - int i = 0;
>>> -
>>> - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>>> - count++;
>>> -
>>> - userq_fence->fence_drv_array =
>>> - kvmalloc_array(count,
>>> - sizeof(struct amdgpu_userq_fence_driver *),
>>> - GFP_KERNEL);
>>> -
>>> - if (userq_fence->fence_drv_array) {
>>> - xa_for_each(&userq->fence_drv_xa, index,
>>> stored_fence_drv) {
>>> - userq_fence->fence_drv_array[i] = stored_fence_drv;
>>> - xa_erase(&userq->fence_drv_xa, index);
>>> - i++;
>>> - }
>>> + unsigned long index_uq;
>>> + u32 index_uf;
>>> + int err;
>>> +
>>> + xa_for_each(&userq->fence_drv_xa, index_uq,
>>> stored_fence_drv) {
>>> + err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>>> + stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>>
>> That is even worse than what was discussed before. Now you have two
>> XAs and the second is incorrectly using GFP_KERNEL.
>
> I think the problem here is, the WAIT IOCTL updates the
> userq->fence_drv_xa entries between the 2 xa_for_each blocks
> exactly at kvmalloc_array memory allocation. Though, we are locking
> the first and second xa_for_each blocks and having the
> GFP_ATOMIC in place didnt help to resolve the problem.
Yeah, I agree on the problem. But I don't understand why using
GFP_ATOMIC didn't solved the issue.
>
> For example,
> kvmalloc_array() is allocating the memory for the count value(say 5)
> and before we acquire the second
> xa_for_each block lock, the count modified to (say 7) by the WAIT
> IOCTL xa_alloc() function (by acquiring the same lock),
> and we would be iterating for the new count. But the memory allocated
> would be for 5 entries.
>
> xa_lock()
> first xa_for_each block to count the entries
> xa_unlock()
When you use GFP_ATOMIC you can drop this xa_unlock().
>
> kvmalloc_array allocates for count 5
>
> xa_lock()
And that xa_lock() and so make sure that the xa isn't modified in between.
Regards,
Christian.
> second xa_for_each block to move the entries to allocated memory
> here the count increased to 7
> xa_unlock
>
> Thanks,
> Arun.
>>
>> Regards,
>> Christian.
>>
>>> + if (err)
>>> + return err;
>>> }
>>> -
>>> - userq_fence->fence_drv_array_count = i;
>>> - } else {
>>> - userq_fence->fence_drv_array = NULL;
>>> - userq_fence->fence_drv_array_count = 0;
>>> + xa_destroy(&userq->fence_drv_xa);
>>> }
>>> /* Check if hardware has already processed the job */
>>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct
>>> rcu_head *rcu)
>>> /* Release the fence driver reference */
>>> amdgpu_userq_fence_driver_put(fence_drv);
>>> -
>>> - kvfree(userq_fence->fence_drv_array);
>>> kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> index f1a90840ac1f..3283e5573d10 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>> */
>>> spinlock_t lock;
>>> struct list_head link;
>>> - unsigned long fence_drv_array_count;
>>> + struct xarray fence_drv_xa;
>>> struct amdgpu_userq_fence_driver *fence_drv;
>>> - struct amdgpu_userq_fence_driver **fence_drv_array;
>>> };
>>> struct amdgpu_userq_fence_driver {
>>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
2024-12-20 10:38 ` Christian König
@ 2024-12-20 10:45 ` Paneer Selvam, Arunpravin
0 siblings, 0 replies; 8+ messages in thread
From: Paneer Selvam, Arunpravin @ 2024-12-20 10:45 UTC (permalink / raw)
To: Christian König, amd-gfx; +Cc: alexander.deucher
On 12/20/2024 4:08 PM, Christian König wrote:
> Hi Arun,
>
> Am 20.12.24 um 11:34 schrieb Paneer Selvam, Arunpravin:
>> Hi Christian,
>>
>>
>> On 12/19/2024 4:11 PM, Christian König wrote:
>>>
>>>
>>> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>>>> Fix out-of-bounds issue in userq fence create when
>>>> accessing the userq xa structure. Added a lock to
>>>> protect the race condition.
>>>>
>>>> v2:(Christian)
>>>> - Acquire xa lock only for the xa_for_each blocks and
>>>> not for the kvmalloc_array() memory allocation part.
>>>>
>>>> v3:
>>>> - Replacing the kvmalloc_array() storage with xa_alloc()
>>>> solves the problem.
>>>>
>>>> BUG: KASAN: slab-out-of-bounds in
>>>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [ +0.000006] Call Trace:
>>>> [ +0.000005] <TASK>
>>>> [ +0.000005] dump_stack_lvl+0x6c/0x90
>>>> [ +0.000011] print_report+0xc4/0x5e0
>>>> [ +0.000009] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0
>>>> [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [ +0.000405] kasan_report+0xdf/0x120
>>>> [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [ +0.000405] __asan_report_store8_noabort+0x17/0x20
>>>> [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>>>> [ +0.000408] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>>>> [ +0.000013] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>>>> [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [ +0.000404] ? try_to_wake_up+0x165/0x1840
>>>> [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10
>>>> [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm]
>>>> [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>>>> [ +0.000043] ? __kasan_check_read+0x11/0x20
>>>> [ +0.000007] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000007] ? __kasan_check_write+0x14/0x20
>>>> [ +0.000008] drm_ioctl+0x513/0xd20 [drm]
>>>> [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm]
>>>> [ +0.000044] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100
>>>> [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>>>> [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30
>>>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>>>> [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50
>>>> [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>>>> [ +0.000388] __x64_sys_ioctl+0x135/0x1b0
>>>> [ +0.000009] x64_sys_call+0x1205/0x20d0
>>>> [ +0.000007] do_syscall_64+0x4d/0x120
>>>> [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>>> [ +0.000007] RIP: 0033:0x7f7c3d31a94f
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam
>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>> ---
>>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 43
>>>> +++++++------------
>>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +-
>>>> 2 files changed, 17 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> index 2e7271362ace..4289bed6c1f7 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct
>>>> amdgpu_device *adev,
>>>> void amdgpu_userq_fence_driver_process(struct
>>>> amdgpu_userq_fence_driver *fence_drv)
>>>> {
>>>> + struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>> struct amdgpu_userq_fence *userq_fence, *tmp;
>>>> struct dma_fence *fence;
>>>> + unsigned long index;
>>>> u64 rptr;
>>>> - int i;
>>>> if (!fence_drv)
>>>> return;
>>>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct
>>>> amdgpu_userq_fence_driver *fence_d
>>>> dma_fence_signal(fence);
>>>> - for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>>>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>>>> + xa_for_each(&userq_fence->fence_drv_xa, index,
>>>> stored_fence_drv)
>>>> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>>>> list_del(&userq_fence->link);
>>>> dma_fence_put(fence);
>>>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct
>>>> amdgpu_usermode_queue *userq,
>>>> dma_fence_init(fence, &amdgpu_userq_fence_ops,
>>>> &userq_fence->lock,
>>>> fence_drv->context, seq);
>>>> + xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>>>> +
>>>> amdgpu_userq_fence_driver_get(fence_drv);
>>>> dma_fence_get(fence);
>>>> if (!xa_empty(&userq->fence_drv_xa)) {
>>>> struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>> - unsigned long index, count = 0;
>>>> - int i = 0;
>>>> -
>>>> - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>>>> - count++;
>>>> -
>>>> - userq_fence->fence_drv_array =
>>>> - kvmalloc_array(count,
>>>> - sizeof(struct amdgpu_userq_fence_driver *),
>>>> - GFP_KERNEL);
>>>> -
>>>> - if (userq_fence->fence_drv_array) {
>>>> - xa_for_each(&userq->fence_drv_xa, index,
>>>> stored_fence_drv) {
>>>> - userq_fence->fence_drv_array[i] = stored_fence_drv;
>>>> - xa_erase(&userq->fence_drv_xa, index);
>>>> - i++;
>>>> - }
>>>> + unsigned long index_uq;
>>>> + u32 index_uf;
>>>> + int err;
>>>> +
>>>> + xa_for_each(&userq->fence_drv_xa, index_uq,
>>>> stored_fence_drv) {
>>>> + err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>>>> + stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>>>
>>> That is even worse than what was discussed before. Now you have two
>>> XAs and the second is incorrectly using GFP_KERNEL.
>>
>> I think the problem here is, the WAIT IOCTL updates the
>> userq->fence_drv_xa entries between the 2 xa_for_each blocks
>> exactly at kvmalloc_array memory allocation. Though, we are locking
>> the first and second xa_for_each blocks and having the
>> GFP_ATOMIC in place didnt help to resolve the problem.
>
> Yeah, I agree on the problem. But I don't understand why using
> GFP_ATOMIC didn't solved the issue.
>
>>
>> For example,
>> kvmalloc_array() is allocating the memory for the count value(say 5)
>> and before we acquire the second
>> xa_for_each block lock, the count modified to (say 7) by the WAIT
>> IOCTL xa_alloc() function (by acquiring the same lock),
>> and we would be iterating for the new count. But the memory allocated
>> would be for 5 entries.
>>
>> xa_lock()
>> first xa_for_each block to count the entries
>> xa_unlock()
>
> When you use GFP_ATOMIC you can drop this xa_unlock().
>
>>
>> kvmalloc_array allocates for count 5
>>
>> xa_lock()
>
> And that xa_lock() and so make sure that the xa isn't modified in
> between.
Got it. Thanks!
Regards,
Arun.
>
> Regards,
> Christian.
>
>> second xa_for_each block to move the entries to allocated memory
>> here the count increased to 7
>> xa_unlock
>>
>> Thanks,
>> Arun.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> + if (err)
>>>> + return err;
>>>> }
>>>> -
>>>> - userq_fence->fence_drv_array_count = i;
>>>> - } else {
>>>> - userq_fence->fence_drv_array = NULL;
>>>> - userq_fence->fence_drv_array_count = 0;
>>>> + xa_destroy(&userq->fence_drv_xa);
>>>> }
>>>> /* Check if hardware has already processed the job */
>>>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct
>>>> rcu_head *rcu)
>>>> /* Release the fence driver reference */
>>>> amdgpu_userq_fence_driver_put(fence_drv);
>>>> -
>>>> - kvfree(userq_fence->fence_drv_array);
>>>> kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>>> }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> index f1a90840ac1f..3283e5573d10 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>>> */
>>>> spinlock_t lock;
>>>> struct list_head link;
>>>> - unsigned long fence_drv_array_count;
>>>> + struct xarray fence_drv_xa;
>>>> struct amdgpu_userq_fence_driver *fence_drv;
>>>> - struct amdgpu_userq_fence_driver **fence_drv_array;
>>>> };
>>>> struct amdgpu_userq_fence_driver {
>>>
>>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-12-20 10:45 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
2024-12-19 10:43 ` Christian König
2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
2024-12-20 10:34 ` Paneer Selvam, Arunpravin
2024-12-20 10:38 ` Christian König
2024-12-20 10:45 ` Paneer Selvam, Arunpravin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.