All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
@ 2024-12-19 10:38 Arunpravin Paneer Selvam
  2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-12-19 10:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: christian.koenig, alexander.deucher, Arunpravin Paneer Selvam

Fix out-of-bounds issue in userq fence create when
accessing the userq xa structure. Added a lock to
protect the race condition.

v2:(Christian)
  - Acquire xa lock only for the xa_for_each blocks and
    not for the kvmalloc_array() memory allocation part.

v3:
  - Replacing the kvmalloc_array() storage with xa_alloc()
    solves the problem.

BUG: KASAN: slab-out-of-bounds in amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[  +0.000006] Call Trace:
[  +0.000005]  <TASK>
[  +0.000005]  dump_stack_lvl+0x6c/0x90
[  +0.000011]  print_report+0xc4/0x5e0
[  +0.000009]  ? srso_return_thunk+0x5/0x5f
[  +0.000008]  ? kasan_complete_mode_report_info+0x26/0x1d0
[  +0.000007]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[  +0.000405]  kasan_report+0xdf/0x120
[  +0.000009]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[  +0.000405]  __asan_report_store8_noabort+0x17/0x20
[  +0.000007]  amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
[  +0.000406]  ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
[  +0.000408]  ? srso_return_thunk+0x5/0x5f
[  +0.000008]  ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
[  +0.000013]  ? srso_return_thunk+0x5/0x5f
[  +0.000008]  amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
[  +0.000412]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[  +0.000404]  ? try_to_wake_up+0x165/0x1840
[  +0.000010]  ? __pfx_futex_wake_mark+0x10/0x10
[  +0.000011]  drm_ioctl_kernel+0x178/0x2f0 [drm]
[  +0.000050]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[  +0.000404]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
[  +0.000043]  ? __kasan_check_read+0x11/0x20
[  +0.000007]  ? srso_return_thunk+0x5/0x5f
[  +0.000007]  ? __kasan_check_write+0x14/0x20
[  +0.000008]  drm_ioctl+0x513/0xd20 [drm]
[  +0.000040]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[  +0.000407]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
[  +0.000044]  ? srso_return_thunk+0x5/0x5f
[  +0.000007]  ? _raw_spin_lock_irqsave+0x99/0x100
[  +0.000007]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[  +0.000006]  ? __rseq_handle_notify_resume+0x188/0xc30
[  +0.000008]  ? srso_return_thunk+0x5/0x5f
[  +0.000008]  ? srso_return_thunk+0x5/0x5f
[  +0.000006]  ? _raw_spin_unlock_irqrestore+0x27/0x50
[  +0.000010]  amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
[  +0.000388]  __x64_sys_ioctl+0x135/0x1b0
[  +0.000009]  x64_sys_call+0x1205/0x20d0
[  +0.000007]  do_syscall_64+0x4d/0x120
[  +0.000008]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  +0.000007] RIP: 0033:0x7f7c3d31a94f

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 43 +++++++------------
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  3 +-
 2 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 2e7271362ace..4289bed6c1f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
 
 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
 {
+	struct amdgpu_userq_fence_driver *stored_fence_drv;
 	struct amdgpu_userq_fence *userq_fence, *tmp;
 	struct dma_fence *fence;
+	unsigned long index;
 	u64 rptr;
-	int i;
 
 	if (!fence_drv)
 		return;
@@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
 
 		dma_fence_signal(fence);
 
-		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
-			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+		xa_for_each(&userq_fence->fence_drv_xa, index, stored_fence_drv)
+			amdgpu_userq_fence_driver_put(stored_fence_drv);
 
 		list_del(&userq_fence->link);
 		dma_fence_put(fence);
@@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
 	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
 		       fence_drv->context, seq);
 
+	xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
+
 	amdgpu_userq_fence_driver_get(fence_drv);
 	dma_fence_get(fence);
 
 	if (!xa_empty(&userq->fence_drv_xa)) {
 		struct amdgpu_userq_fence_driver *stored_fence_drv;
-		unsigned long index, count = 0;
-		int i = 0;
-
-		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
-			count++;
-
-		userq_fence->fence_drv_array =
-			kvmalloc_array(count,
-				       sizeof(struct amdgpu_userq_fence_driver *),
-				       GFP_KERNEL);
-
-		if (userq_fence->fence_drv_array) {
-			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
-				userq_fence->fence_drv_array[i] = stored_fence_drv;
-				xa_erase(&userq->fence_drv_xa, index);
-				i++;
-			}
+		unsigned long index_uq;
+		u32 index_uf;
+		int err;
+
+		xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
+			err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
+					   stored_fence_drv, xa_limit_32b, GFP_KERNEL);
+			if (err)
+				return err;
 		}
-
-		userq_fence->fence_drv_array_count = i;
-	} else {
-		userq_fence->fence_drv_array = NULL;
-		userq_fence->fence_drv_array_count = 0;
+		xa_destroy(&userq->fence_drv_xa);
 	}
 
 	/* Check if hardware has already processed the job */
@@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
 
 	/* Release the fence driver reference */
 	amdgpu_userq_fence_driver_put(fence_drv);
-
-	kvfree(userq_fence->fence_drv_array);
 	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index f1a90840ac1f..3283e5573d10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
 	 */
 	spinlock_t lock;
 	struct list_head link;
-	unsigned long fence_drv_array_count;
+	struct xarray fence_drv_xa;
 	struct amdgpu_userq_fence_driver *fence_drv;
-	struct amdgpu_userq_fence_driver **fence_drv_array;
 };
 
 struct amdgpu_userq_fence_driver {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability
  2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
@ 2024-12-19 10:38 ` Arunpravin Paneer Selvam
  2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
  2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
  2 siblings, 0 replies; 8+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-12-19 10:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: christian.koenig, alexander.deucher, Arunpravin Paneer Selvam

Make the xa field names used in the userqueue and userfence
structure more understandable and add more clarity.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 31 ++++++++++---------
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  8 ++---
 .../gpu/drm/amd/include/amdgpu_userqueue.h    |  2 +-
 4 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 4289bed6c1f7..6876f24e5eda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -97,7 +97,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
 	spin_lock_init(&fence_drv->fence_list_lock);
 
 	fence_drv->adev = adev;
-	fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa;
+	fence_drv->xa_uq = &userq->xa_uq;
 	fence_drv->context = dma_fence_context_alloc(1);
 	get_task_comm(fence_drv->timeline_name, current);
 
@@ -122,8 +122,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
 
 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
 {
-	struct amdgpu_userq_fence_driver *stored_fence_drv;
 	struct amdgpu_userq_fence *userq_fence, *tmp;
+	struct amdgpu_userq_fence_driver *fdrv;
 	struct dma_fence *fence;
 	unsigned long index;
 	u64 rptr;
@@ -142,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
 
 		dma_fence_signal(fence);
 
-		xa_for_each(&userq_fence->fence_drv_xa, index, stored_fence_drv)
-			amdgpu_userq_fence_driver_put(stored_fence_drv);
+		xa_for_each(&userq_fence->xa_uf, index, fdrv)
+			amdgpu_userq_fence_driver_put(fdrv);
 
 		list_del(&userq_fence->link);
 		dma_fence_put(fence);
@@ -222,24 +222,25 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
 	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
 		       fence_drv->context, seq);
 
-	xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
+	xa_init_flags(&userq_fence->xa_uf, XA_FLAGS_ALLOC);
 
 	amdgpu_userq_fence_driver_get(fence_drv);
 	dma_fence_get(fence);
 
-	if (!xa_empty(&userq->fence_drv_xa)) {
-		struct amdgpu_userq_fence_driver *stored_fence_drv;
+	/* Move all fence driver entries from xa_uq to xa_uf */
+	if (!xa_empty(&userq->xa_uq)) {
+		struct amdgpu_userq_fence_driver *fdrv;
 		unsigned long index_uq;
 		u32 index_uf;
 		int err;
 
-		xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
-			err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
-					   stored_fence_drv, xa_limit_32b, GFP_KERNEL);
+		xa_for_each(&userq->xa_uq, index_uq, fdrv) {
+			err = xa_alloc_irq(&userq_fence->xa_uf, &index_uf,
+					   fdrv, xa_limit_32b, GFP_KERNEL);
 			if (err)
 				return err;
 		}
-		xa_destroy(&userq->fence_drv_xa);
+		xa_destroy(&userq->xa_uq);
 	}
 
 	/* Check if hardware has already processed the job */
@@ -809,7 +810,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 		for (i = 0, cnt = 0; i < num_fences; i++) {
 			struct amdgpu_userq_fence_driver *fence_drv;
 			struct amdgpu_userq_fence *userq_fence;
-			u32 index;
+			u32 index_uq;
 
 			userq_fence = to_amdgpu_userq_fence(fences[i]);
 			if (!userq_fence) {
@@ -834,9 +835,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 			 * Otherwise, we would gather those references until we don't
 			 * have any more space left and crash.
 			 */
-			if (fence_drv->fence_drv_xa_ptr) {
-				r = xa_alloc(fence_drv->fence_drv_xa_ptr, &index, fence_drv,
-					     xa_limit_32b, GFP_KERNEL);
+			if (fence_drv->xa_uq) {
+				r = xa_alloc(fence_drv->xa_uq, &index_uq,
+					     fence_drv, xa_limit_32b, GFP_KERNEL);
 				if (r)
 					goto free_fences;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index 3283e5573d10..74edabc8e021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -37,7 +37,7 @@ struct amdgpu_userq_fence {
 	 */
 	spinlock_t lock;
 	struct list_head link;
-	struct xarray fence_drv_xa;
+	struct xarray xa_uf;
 	struct amdgpu_userq_fence_driver *fence_drv;
 };
 
@@ -54,7 +54,7 @@ struct amdgpu_userq_fence_driver {
 	spinlock_t fence_list_lock;
 	struct list_head fences;
 	struct amdgpu_device *adev;
-	struct xarray *fence_drv_xa_ptr;
+	struct xarray *xa_uq;
 	char timeline_name[TASK_COMM_LEN];
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 85baba323ba5..72ff623f8c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -48,8 +48,8 @@ static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
 static void
 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
 {
-	amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
-	xa_destroy(&userq->fence_drv_xa);
+	amdgpu_userq_walk_and_drop_fence_drv(&userq->xa_uq);
+	xa_destroy(&userq->xa_uq);
 	/* Drop the fence_drv reference held by user queue */
 	amdgpu_userq_fence_driver_put(userq->fence_drv);
 }
@@ -73,7 +73,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
 	}
 
 	uq_funcs->mqd_destroy(uq_mgr, queue);
-	queue->fence_drv->fence_drv_xa_ptr = NULL;
+	queue->fence_drv->xa_uq = NULL;
 	amdgpu_userq_fence_driver_free(queue);
 	idr_remove(&uq_mgr->userq_idr, queue_id);
 	kfree(queue);
@@ -315,7 +315,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 	}
 	queue->doorbell_index = index;
 
-	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+	xa_init_flags(&queue->xa_uq, XA_FLAGS_ALLOC);
 	r = amdgpu_userq_fence_driver_alloc(adev, queue);
 	if (r) {
 		DRM_ERROR("Failed to alloc fence driver\n");
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index e7e8d79b689d..8048a5fb917a 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -53,7 +53,7 @@ struct amdgpu_usermode_queue {
 	struct amdgpu_userq_obj	db_obj;
 	struct amdgpu_userq_obj fw_obj;
 	struct amdgpu_userq_obj wptr_obj;
-	struct xarray		fence_drv_xa;
+	struct xarray		xa_uq;
 	struct amdgpu_userq_fence_driver *fence_drv;
 	struct dma_fence	*last_fence;
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues.
  2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
  2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
@ 2024-12-19 10:38 ` Arunpravin Paneer Selvam
  2024-12-19 10:43   ` Christian König
  2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
  2 siblings, 1 reply; 8+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-12-19 10:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: christian.koenig, alexander.deucher, Arunpravin Paneer Selvam

Fix the wait IOCTL lockup issue.

v2:(Christian)
  - The problem is that you need to lock all BOs under a single
    drm_exec_until_all_locked() loop. Otherwise not all BOs
    would be locked again on contention.

v3: Locking vm pd when accessing the GEM BO's solves the lockup
    issues.

[Dec 6 17:53] watchdog: BUG: soft lockup - CPU#4 stuck for 26s! [Xwayland:cs0:2634]
[  +0.000002] RIP: 0010:amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
[  +0.000003] RSP: 0018:ffffada901f4fc38 EFLAGS: 00000202
[  +0.000003] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000001
[  +0.000001] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: ffffada901f4fce8
[  +0.000002] RBP: ffffada901f4fd58 R08: ffff9fc28b974148 R09: 0000000000000000
[  +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
[  +0.000002] R13: 0000000000000000 R14: 0000000000000010 R15: ffffada901f4fe00
[  +0.000002] FS:  00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
[  +0.000002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
[  +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
[  +0.000001] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
[  +0.000002] Call Trace:
[  +0.000002]  <IRQ>
[  +0.000004]  ? show_regs+0x69/0x80
[  +0.000005]  ? watchdog_timer_fn+0x271/0x300
[  +0.000005]  ? __pfx_watchdog_timer_fn+0x10/0x10
[  +0.000003]  ? __hrtimer_run_queues+0x114/0x2a0
[  +0.000006]  ? hrtimer_interrupt+0x110/0x240
[  +0.000005]  ? __sysvec_apic_timer_interrupt+0x73/0x180
[  +0.000004]  ? sysvec_apic_timer_interrupt+0xaa/0xd0
[  +0.000004]  </IRQ>
[  +0.000002]  <TASK>
[  +0.000002]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[  +0.000006]  ? amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
[  +0.000162]  ? amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
[  +0.000156]  ? finish_task_switch.isra.0+0x94/0x290
[  +0.000010]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.000161]  drm_ioctl_kernel+0xaa/0x100 [drm]
[  +0.000025]  drm_ioctl+0x29d/0x500 [drm]
[  +0.000017]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.000160]  ? srso_return_thunk+0x5/0x5f
[  +0.000004]  ? srso_return_thunk+0x5/0x5f
[  +0.000003]  ? _raw_spin_unlock_irqrestore+0x27/0x50
[  +0.000004]  amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
[  +0.000141]  __x64_sys_ioctl+0x95/0xd0
[  +0.000005]  x64_sys_call+0x1205/0x20d0
[  +0.000003]  do_syscall_64+0x4d/0x120
[  +0.000004]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  +0.000002] RIP: 0033:0x7f3a0bb1a94f
[  +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
[  +0.000001] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
[  +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
[  +0.000002] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
[  +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
[  +0.000005]  </TASK>
[ +27.998164] watchdog: BUG: soft lockup - CPU#4 stuck for 52s! [Xwayland:cs0:2634]
[  +0.000002] RIP: 0010:drm_exec_unlock_all+0x76/0xc0 [drm_exec]
[  +0.000002] RSP: 0018:ffffada901f4fbf8 EFLAGS: 00000246
[  +0.000003] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001
[  +0.000002] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: 0000000000000000
[  +0.000001] RBP: ffffada901f4fc10 R08: ffff9fc28b974148 R09: 0000000000000000
[  +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
[  +0.000002] R13: 00000000ffffffff R14: ffffada901f4fce8 R15: ffffada901f4fe00
[  +0.000002] FS:  00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
[  +0.000002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
[  +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
[  +0.000002] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
[  +0.000002] Call Trace:
[  +0.000002]  <IRQ>
[  +0.000003]  ? show_regs+0x69/0x80
[  +0.000006]  ? watchdog_timer_fn+0x271/0x300
[  +0.000004]  ? __pfx_watchdog_timer_fn+0x10/0x10
[  +0.000003]  ? __hrtimer_run_queues+0x114/0x2a0
[  +0.000006]  ? hrtimer_interrupt+0x110/0x240
[  +0.000005]  ? __sysvec_apic_timer_interrupt+0x73/0x180
[  +0.000004]  ? sysvec_apic_timer_interrupt+0xaa/0xd0
[  +0.000004]  </IRQ>
[  +0.000002]  <TASK>
[  +0.000002]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[  +0.000006]  ? drm_exec_unlock_all+0x76/0xc0 [drm_exec]
[  +0.000004]  drm_exec_cleanup+0x77/0x90 [drm_exec]
[  +0.000004]  amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
[  +0.000206]  ? finish_task_switch.isra.0+0x94/0x290
[  +0.000010]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.000159]  drm_ioctl_kernel+0xaa/0x100 [drm]
[  +0.000026]  drm_ioctl+0x29d/0x500 [drm]
[  +0.000017]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.000163]  ? srso_return_thunk+0x5/0x5f
[  +0.000005]  ? srso_return_thunk+0x5/0x5f
[  +0.000002]  ? _raw_spin_unlock_irqrestore+0x27/0x50
[  +0.000005]  amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
[  +0.000143]  __x64_sys_ioctl+0x95/0xd0
[  +0.000005]  x64_sys_call+0x1205/0x20d0
[  +0.000004]  do_syscall_64+0x4d/0x120
[  +0.000003]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  +0.000002] RIP: 0033:0x7f3a0bb1a94f
[  +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
[  +0.000002] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
[  +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
[  +0.000001] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
[  +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
[  +0.000006]  </TASK>

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 45 ++++++++++++-------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 6876f24e5eda..35d306fc069d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -463,22 +463,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 		goto put_gobj_write;
 	}
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
-		      (num_read_bo_handles + num_write_bo_handles));
-
-	/* Lock all BOs with retry handling */
-	drm_exec_until_all_locked(&exec) {
-		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
-		drm_exec_retry_on_contention(&exec);
-		if (r)
-			goto exec_fini;
-
-		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
-		drm_exec_retry_on_contention(&exec);
-		if (r)
-			goto exec_fini;
-	}
-
 	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
 	if (r)
 		goto exec_fini;
@@ -495,6 +479,27 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 	queue->last_fence = dma_fence_get(fence);
 	mutex_unlock(&uq_mgr->userq_mutex);
 
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+		      (num_read_bo_handles + num_write_bo_handles));
+
+	/* Lock all BOs with retry handling */
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(queue->vm, &exec, 0);
+		drm_exec_retry_on_contention(&exec);
+		if (r)
+			goto exec_fini;
+
+		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r)
+			goto exec_fini;
+
+		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r)
+			goto exec_fini;
+	}
+
 	for (i = 0; i < num_read_bo_handles; i++) {
 		if (!gobj_read || !gobj_read[i]->resv)
 			continue;
@@ -558,6 +563,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
 	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
 	struct drm_amdgpu_userq_wait *wait_info = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct drm_gem_object **gobj_write;
 	struct drm_gem_object **gobj_read;
 	struct dma_fence **fences = NULL;
@@ -635,6 +641,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 
 	/* Lock all BOs with retry handling */
 	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			drm_exec_fini(&exec);
+			goto put_gobj_write;
+		}
+
 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
 		drm_exec_retry_on_contention(&exec);
 		if (r) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
  2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
  2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
  2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
@ 2024-12-19 10:41 ` Christian König
  2024-12-20 10:34   ` Paneer Selvam, Arunpravin
  2 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2024-12-19 10:41 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, amd-gfx; +Cc: alexander.deucher



Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
> Fix out-of-bounds issue in userq fence create when
> accessing the userq xa structure. Added a lock to
> protect the race condition.
>
> v2:(Christian)
>    - Acquire xa lock only for the xa_for_each blocks and
>      not for the kvmalloc_array() memory allocation part.
>
> v3:
>    - Replacing the kvmalloc_array() storage with xa_alloc()
>      solves the problem.
>
> BUG: KASAN: slab-out-of-bounds in amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [  +0.000006] Call Trace:
> [  +0.000005]  <TASK>
> [  +0.000005]  dump_stack_lvl+0x6c/0x90
> [  +0.000011]  print_report+0xc4/0x5e0
> [  +0.000009]  ? srso_return_thunk+0x5/0x5f
> [  +0.000008]  ? kasan_complete_mode_report_info+0x26/0x1d0
> [  +0.000007]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [  +0.000405]  kasan_report+0xdf/0x120
> [  +0.000009]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [  +0.000405]  __asan_report_store8_noabort+0x17/0x20
> [  +0.000007]  amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
> [  +0.000406]  ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
> [  +0.000408]  ? srso_return_thunk+0x5/0x5f
> [  +0.000008]  ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
> [  +0.000013]  ? srso_return_thunk+0x5/0x5f
> [  +0.000008]  amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
> [  +0.000412]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
> [  +0.000404]  ? try_to_wake_up+0x165/0x1840
> [  +0.000010]  ? __pfx_futex_wake_mark+0x10/0x10
> [  +0.000011]  drm_ioctl_kernel+0x178/0x2f0 [drm]
> [  +0.000050]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
> [  +0.000404]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
> [  +0.000043]  ? __kasan_check_read+0x11/0x20
> [  +0.000007]  ? srso_return_thunk+0x5/0x5f
> [  +0.000007]  ? __kasan_check_write+0x14/0x20
> [  +0.000008]  drm_ioctl+0x513/0xd20 [drm]
> [  +0.000040]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
> [  +0.000407]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
> [  +0.000044]  ? srso_return_thunk+0x5/0x5f
> [  +0.000007]  ? _raw_spin_lock_irqsave+0x99/0x100
> [  +0.000007]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
> [  +0.000006]  ? __rseq_handle_notify_resume+0x188/0xc30
> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
> [  +0.000006]  ? _raw_spin_unlock_irqrestore+0x27/0x50
> [  +0.000010]  amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
> [  +0.000388]  __x64_sys_ioctl+0x135/0x1b0
> [  +0.000009]  x64_sys_call+0x1205/0x20d0
> [  +0.000007]  do_syscall_64+0x4d/0x120
> [  +0.000008]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [  +0.000007] RIP: 0033:0x7f7c3d31a94f
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 43 +++++++------------
>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  3 +-
>   2 files changed, 17 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 2e7271362ace..4289bed6c1f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
>   
>   void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
>   {
> +	struct amdgpu_userq_fence_driver *stored_fence_drv;
>   	struct amdgpu_userq_fence *userq_fence, *tmp;
>   	struct dma_fence *fence;
> +	unsigned long index;
>   	u64 rptr;
> -	int i;
>   
>   	if (!fence_drv)
>   		return;
> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
>   
>   		dma_fence_signal(fence);
>   
> -		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
> -			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
> +		xa_for_each(&userq_fence->fence_drv_xa, index, stored_fence_drv)
> +			amdgpu_userq_fence_driver_put(stored_fence_drv);
>   
>   		list_del(&userq_fence->link);
>   		dma_fence_put(fence);
> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
>   	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
>   		       fence_drv->context, seq);
>   
> +	xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
> +
>   	amdgpu_userq_fence_driver_get(fence_drv);
>   	dma_fence_get(fence);
>   
>   	if (!xa_empty(&userq->fence_drv_xa)) {
>   		struct amdgpu_userq_fence_driver *stored_fence_drv;
> -		unsigned long index, count = 0;
> -		int i = 0;
> -
> -		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
> -			count++;
> -
> -		userq_fence->fence_drv_array =
> -			kvmalloc_array(count,
> -				       sizeof(struct amdgpu_userq_fence_driver *),
> -				       GFP_KERNEL);
> -
> -		if (userq_fence->fence_drv_array) {
> -			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
> -				userq_fence->fence_drv_array[i] = stored_fence_drv;
> -				xa_erase(&userq->fence_drv_xa, index);
> -				i++;
> -			}
> +		unsigned long index_uq;
> +		u32 index_uf;
> +		int err;
> +
> +		xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
> +			err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
> +					   stored_fence_drv, xa_limit_32b, GFP_KERNEL);

That is even worse than what was discussed before. Now you have two XAs 
and the second is incorrectly using GFP_KERNEL.

Regards,
Christian.

> +			if (err)
> +				return err;
>   		}
> -
> -		userq_fence->fence_drv_array_count = i;
> -	} else {
> -		userq_fence->fence_drv_array = NULL;
> -		userq_fence->fence_drv_array_count = 0;
> +		xa_destroy(&userq->fence_drv_xa);
>   	}
>   
>   	/* Check if hardware has already processed the job */
> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
>   
>   	/* Release the fence driver reference */
>   	amdgpu_userq_fence_driver_put(fence_drv);
> -
> -	kvfree(userq_fence->fence_drv_array);
>   	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index f1a90840ac1f..3283e5573d10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>   	 */
>   	spinlock_t lock;
>   	struct list_head link;
> -	unsigned long fence_drv_array_count;
> +	struct xarray fence_drv_xa;
>   	struct amdgpu_userq_fence_driver *fence_drv;
> -	struct amdgpu_userq_fence_driver **fence_drv_array;
>   };
>   
>   struct amdgpu_userq_fence_driver {


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues.
  2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
@ 2024-12-19 10:43   ` Christian König
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2024-12-19 10:43 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, amd-gfx; +Cc: alexander.deucher

Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
> Fix the wait IOCTL lockup issue.
>
> v2:(Christian)
>    - The problem is that you need to lock all BOs under a single
>      drm_exec_until_all_locked() loop. Otherwise not all BOs
>      would be locked again on contention.
>
> v3: Locking vm pd when accessing the GEM BO's solves the lockup
>      issues.
>
> [Dec 6 17:53] watchdog: BUG: soft lockup - CPU#4 stuck for 26s! [Xwayland:cs0:2634]
> [  +0.000002] RIP: 0010:amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
> [  +0.000003] RSP: 0018:ffffada901f4fc38 EFLAGS: 00000202
> [  +0.000003] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000001
> [  +0.000001] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: ffffada901f4fce8
> [  +0.000002] RBP: ffffada901f4fd58 R08: ffff9fc28b974148 R09: 0000000000000000
> [  +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
> [  +0.000002] R13: 0000000000000000 R14: 0000000000000010 R15: ffffada901f4fe00
> [  +0.000002] FS:  00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
> [  +0.000002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
> [  +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
> [  +0.000001] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
> [  +0.000002] Call Trace:
> [  +0.000002]  <IRQ>
> [  +0.000004]  ? show_regs+0x69/0x80
> [  +0.000005]  ? watchdog_timer_fn+0x271/0x300
> [  +0.000005]  ? __pfx_watchdog_timer_fn+0x10/0x10
> [  +0.000003]  ? __hrtimer_run_queues+0x114/0x2a0
> [  +0.000006]  ? hrtimer_interrupt+0x110/0x240
> [  +0.000005]  ? __sysvec_apic_timer_interrupt+0x73/0x180
> [  +0.000004]  ? sysvec_apic_timer_interrupt+0xaa/0xd0
> [  +0.000004]  </IRQ>
> [  +0.000002]  <TASK>
> [  +0.000002]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
> [  +0.000006]  ? amdgpu_userq_wait_ioctl+0x3ce/0xfe0 [amdgpu]
> [  +0.000162]  ? amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
> [  +0.000156]  ? finish_task_switch.isra.0+0x94/0x290
> [  +0.000010]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [  +0.000161]  drm_ioctl_kernel+0xaa/0x100 [drm]
> [  +0.000025]  drm_ioctl+0x29d/0x500 [drm]
> [  +0.000017]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [  +0.000160]  ? srso_return_thunk+0x5/0x5f
> [  +0.000004]  ? srso_return_thunk+0x5/0x5f
> [  +0.000003]  ? _raw_spin_unlock_irqrestore+0x27/0x50
> [  +0.000004]  amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
> [  +0.000141]  __x64_sys_ioctl+0x95/0xd0
> [  +0.000005]  x64_sys_call+0x1205/0x20d0
> [  +0.000003]  do_syscall_64+0x4d/0x120
> [  +0.000004]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [  +0.000002] RIP: 0033:0x7f3a0bb1a94f
> [  +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [  +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
> [  +0.000001] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
> [  +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
> [  +0.000002] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
> [  +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
> [  +0.000005]  </TASK>
> [ +27.998164] watchdog: BUG: soft lockup - CPU#4 stuck for 52s! [Xwayland:cs0:2634]
> [  +0.000002] RIP: 0010:drm_exec_unlock_all+0x76/0xc0 [drm_exec]
> [  +0.000002] RSP: 0018:ffffada901f4fbf8 EFLAGS: 00000246
> [  +0.000003] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001
> [  +0.000002] RDX: ffff9fc28b974048 RSI: 0000000000000010 RDI: 0000000000000000
> [  +0.000001] RBP: ffffada901f4fc10 R08: ffff9fc28b974148 R09: 0000000000000000
> [  +0.000002] R10: ffffada901f4fbf0 R11: 0000000000000001 R12: ffff9fc28ed1ac00
> [  +0.000002] R13: 00000000ffffffff R14: ffffada901f4fce8 R15: ffffada901f4fe00
> [  +0.000002] FS:  00007f3a00a00640(0000) GS:ffff9fc99e800000(0000) knlGS:0000000000000000
> [  +0.000002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  +0.000002] CR2: 00007f8e92137020 CR3: 000000013380e000 CR4: 0000000000350ef0
> [  +0.000002] DR0: ffffffff90921610 DR1: ffffffff90921611 DR2: ffffffff90921612
> [  +0.000002] DR3: ffffffff90921613 DR6: 00000000ffff0ff0 DR7: 0000000000000600
> [  +0.000002] Call Trace:
> [  +0.000002]  <IRQ>
> [  +0.000003]  ? show_regs+0x69/0x80
> [  +0.000006]  ? watchdog_timer_fn+0x271/0x300
> [  +0.000004]  ? __pfx_watchdog_timer_fn+0x10/0x10
> [  +0.000003]  ? __hrtimer_run_queues+0x114/0x2a0
> [  +0.000006]  ? hrtimer_interrupt+0x110/0x240
> [  +0.000005]  ? __sysvec_apic_timer_interrupt+0x73/0x180
> [  +0.000004]  ? sysvec_apic_timer_interrupt+0xaa/0xd0
> [  +0.000004]  </IRQ>
> [  +0.000002]  <TASK>
> [  +0.000002]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
> [  +0.000006]  ? drm_exec_unlock_all+0x76/0xc0 [drm_exec]
> [  +0.000004]  drm_exec_cleanup+0x77/0x90 [drm_exec]
> [  +0.000004]  amdgpu_userq_wait_ioctl+0x3cc/0xfe0 [amdgpu]
> [  +0.000206]  ? finish_task_switch.isra.0+0x94/0x290
> [  +0.000010]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [  +0.000159]  drm_ioctl_kernel+0xaa/0x100 [drm]
> [  +0.000026]  drm_ioctl+0x29d/0x500 [drm]
> [  +0.000017]  ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu]
> [  +0.000163]  ? srso_return_thunk+0x5/0x5f
> [  +0.000005]  ? srso_return_thunk+0x5/0x5f
> [  +0.000002]  ? _raw_spin_unlock_irqrestore+0x27/0x50
> [  +0.000005]  amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
> [  +0.000143]  __x64_sys_ioctl+0x95/0xd0
> [  +0.000005]  x64_sys_call+0x1205/0x20d0
> [  +0.000004]  do_syscall_64+0x4d/0x120
> [  +0.000003]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [  +0.000002] RIP: 0033:0x7f3a0bb1a94f
> [  +0.000002] RSP: 002b:00007f3a009ff870 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [  +0.000003] RAX: ffffffffffffffda RBX: 00007f3a009ff9f0 RCX: 00007f3a0bb1a94f
> [  +0.000002] RDX: 00007f3a009ff9f0 RSI: 00000000c0406458 RDI: 000000000000000c
> [  +0.000002] RBP: 00007f3a009ff8f0 R08: 0000000000000001 R09: 0000000000000000
> [  +0.000001] R10: 0000000000000002 R11: 0000000000000246 R12: 0000561824bf39e0
> [  +0.000002] R13: 00000000c0406458 R14: 000000000000000c R15: 0000561824a25b60
> [  +0.000006]  </TASK>
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 45 ++++++++++++-------
>   1 file changed, 29 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 6876f24e5eda..35d306fc069d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -463,22 +463,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>   		goto put_gobj_write;
>   	}
>   
> -	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
> -		      (num_read_bo_handles + num_write_bo_handles));
> -
> -	/* Lock all BOs with retry handling */
> -	drm_exec_until_all_locked(&exec) {
> -		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
> -		drm_exec_retry_on_contention(&exec);
> -		if (r)
> -			goto exec_fini;
> -
> -		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
> -		drm_exec_retry_on_contention(&exec);
> -		if (r)
> -			goto exec_fini;
> -	}
> -
>   	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
>   	if (r)
>   		goto exec_fini;
> @@ -495,6 +479,27 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>   	queue->last_fence = dma_fence_get(fence);
>   	mutex_unlock(&uq_mgr->userq_mutex);
>   
> +	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
> +		      (num_read_bo_handles + num_write_bo_handles));
> +
> +	/* Lock all BOs with retry handling */
> +	drm_exec_until_all_locked(&exec) {
> +		r = amdgpu_vm_lock_pd(queue->vm, &exec, 0);

Well that absolutely makes no sense at all.

First of all we need to lock the BOs *before* we create the fence. Then 
there is no need to lock the VM PD here.

I have no idea what you are trying to do here.

Regards,
Christian.

> +		drm_exec_retry_on_contention(&exec);
> +		if (r)
> +			goto exec_fini;
> +
> +		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
> +		drm_exec_retry_on_contention(&exec);
> +		if (r)
> +			goto exec_fini;
> +
> +		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
> +		drm_exec_retry_on_contention(&exec);
> +		if (r)
> +			goto exec_fini;
> +	}
> +
>   	for (i = 0; i < num_read_bo_handles; i++) {
>   		if (!gobj_read || !gobj_read[i]->resv)
>   			continue;
> @@ -558,6 +563,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
>   	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
>   	struct drm_amdgpu_userq_wait *wait_info = data;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>   	struct drm_gem_object **gobj_write;
>   	struct drm_gem_object **gobj_read;
>   	struct dma_fence **fences = NULL;
> @@ -635,6 +641,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   
>   	/* Lock all BOs with retry handling */
>   	drm_exec_until_all_locked(&exec) {
> +		r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
> +		drm_exec_retry_on_contention(&exec);
> +		if (r) {
> +			drm_exec_fini(&exec);
> +			goto put_gobj_write;
> +		}
> +
>   		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
>   		drm_exec_retry_on_contention(&exec);
>   		if (r) {


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
  2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
@ 2024-12-20 10:34   ` Paneer Selvam, Arunpravin
  2024-12-20 10:38     ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Paneer Selvam, Arunpravin @ 2024-12-20 10:34 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: alexander.deucher

Hi Christian,


On 12/19/2024 4:11 PM, Christian König wrote:
>
>
> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>> Fix out-of-bounds issue in userq fence create when
>> accessing the userq xa structure. Added a lock to
>> protect the race condition.
>>
>> v2:(Christian)
>>    - Acquire xa lock only for the xa_for_each blocks and
>>      not for the kvmalloc_array() memory allocation part.
>>
>> v3:
>>    - Replacing the kvmalloc_array() storage with xa_alloc()
>>      solves the problem.
>>
>> BUG: KASAN: slab-out-of-bounds in 
>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [  +0.000006] Call Trace:
>> [  +0.000005]  <TASK>
>> [  +0.000005]  dump_stack_lvl+0x6c/0x90
>> [  +0.000011]  print_report+0xc4/0x5e0
>> [  +0.000009]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000008]  ? kasan_complete_mode_report_info+0x26/0x1d0
>> [  +0.000007]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [  +0.000405]  kasan_report+0xdf/0x120
>> [  +0.000009]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [  +0.000405]  __asan_report_store8_noabort+0x17/0x20
>> [  +0.000007]  amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>> [  +0.000406]  ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>> [  +0.000408]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000008]  ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>> [  +0.000013]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000008]  amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>> [  +0.000412]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>> [  +0.000404]  ? try_to_wake_up+0x165/0x1840
>> [  +0.000010]  ? __pfx_futex_wake_mark+0x10/0x10
>> [  +0.000011]  drm_ioctl_kernel+0x178/0x2f0 [drm]
>> [  +0.000050]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>> [  +0.000404]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>> [  +0.000043]  ? __kasan_check_read+0x11/0x20
>> [  +0.000007]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000007]  ? __kasan_check_write+0x14/0x20
>> [  +0.000008]  drm_ioctl+0x513/0xd20 [drm]
>> [  +0.000040]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>> [  +0.000407]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
>> [  +0.000044]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000007]  ? _raw_spin_lock_irqsave+0x99/0x100
>> [  +0.000007]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>> [  +0.000006]  ? __rseq_handle_notify_resume+0x188/0xc30
>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>> [  +0.000006]  ? _raw_spin_unlock_irqrestore+0x27/0x50
>> [  +0.000010]  amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>> [  +0.000388]  __x64_sys_ioctl+0x135/0x1b0
>> [  +0.000009]  x64_sys_call+0x1205/0x20d0
>> [  +0.000007]  do_syscall_64+0x4d/0x120
>> [  +0.000008]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> [  +0.000007] RIP: 0033:0x7f7c3d31a94f
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 43 +++++++------------
>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  3 +-
>>   2 files changed, 17 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> index 2e7271362ace..4289bed6c1f7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct 
>> amdgpu_device *adev,
>>     void amdgpu_userq_fence_driver_process(struct 
>> amdgpu_userq_fence_driver *fence_drv)
>>   {
>> +    struct amdgpu_userq_fence_driver *stored_fence_drv;
>>       struct amdgpu_userq_fence *userq_fence, *tmp;
>>       struct dma_fence *fence;
>> +    unsigned long index;
>>       u64 rptr;
>> -    int i;
>>         if (!fence_drv)
>>           return;
>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct 
>> amdgpu_userq_fence_driver *fence_d
>>             dma_fence_signal(fence);
>>   -        for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>> +        xa_for_each(&userq_fence->fence_drv_xa, index, 
>> stored_fence_drv)
>> +            amdgpu_userq_fence_driver_put(stored_fence_drv);
>>             list_del(&userq_fence->link);
>>           dma_fence_put(fence);
>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct 
>> amdgpu_usermode_queue *userq,
>>       dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
>>                  fence_drv->context, seq);
>>   +    xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>> +
>>       amdgpu_userq_fence_driver_get(fence_drv);
>>       dma_fence_get(fence);
>>         if (!xa_empty(&userq->fence_drv_xa)) {
>>           struct amdgpu_userq_fence_driver *stored_fence_drv;
>> -        unsigned long index, count = 0;
>> -        int i = 0;
>> -
>> -        xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>> -            count++;
>> -
>> -        userq_fence->fence_drv_array =
>> -            kvmalloc_array(count,
>> -                       sizeof(struct amdgpu_userq_fence_driver *),
>> -                       GFP_KERNEL);
>> -
>> -        if (userq_fence->fence_drv_array) {
>> -            xa_for_each(&userq->fence_drv_xa, index, 
>> stored_fence_drv) {
>> -                userq_fence->fence_drv_array[i] = stored_fence_drv;
>> -                xa_erase(&userq->fence_drv_xa, index);
>> -                i++;
>> -            }
>> +        unsigned long index_uq;
>> +        u32 index_uf;
>> +        int err;
>> +
>> +        xa_for_each(&userq->fence_drv_xa, index_uq, stored_fence_drv) {
>> +            err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>> +                       stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>
> That is even worse than what was discussed before. Now you have two 
> XAs and the second is incorrectly using GFP_KERNEL.

I think the problem here is, the WAIT IOCTL updates the 
userq->fence_drv_xa entries between the 2 xa_for_each blocks
exactly at kvmalloc_array memory allocation. Though, we are locking the 
first and second xa_for_each blocks and having the
GFP_ATOMIC in place didnt help to resolve the problem.

For example,
kvmalloc_array() is allocating the memory for the count value(say 5) and 
before we acquire the second
xa_for_each block lock, the count modified to (say 7) by the WAIT IOCTL 
xa_alloc() function (by acquiring the same lock),
and we would be iterating for the new count. But the memory allocated 
would be for 5 entries.

xa_lock()
first xa_for_each block to count the entries
xa_unlock()

kvmalloc_array allocates for count 5

xa_lock()
second xa_for_each block to move the entries to allocated memory
here the count increased to 7
xa_unlock

Thanks,
Arun.
>
> Regards,
> Christian.
>
>> +            if (err)
>> +                return err;
>>           }
>> -
>> -        userq_fence->fence_drv_array_count = i;
>> -    } else {
>> -        userq_fence->fence_drv_array = NULL;
>> -        userq_fence->fence_drv_array_count = 0;
>> +        xa_destroy(&userq->fence_drv_xa);
>>       }
>>         /* Check if hardware has already processed the job */
>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct 
>> rcu_head *rcu)
>>         /* Release the fence driver reference */
>>       amdgpu_userq_fence_driver_put(fence_drv);
>> -
>> -    kvfree(userq_fence->fence_drv_array);
>>       kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> index f1a90840ac1f..3283e5573d10 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>        */
>>       spinlock_t lock;
>>       struct list_head link;
>> -    unsigned long fence_drv_array_count;
>> +    struct xarray fence_drv_xa;
>>       struct amdgpu_userq_fence_driver *fence_drv;
>> -    struct amdgpu_userq_fence_driver **fence_drv_array;
>>   };
>>     struct amdgpu_userq_fence_driver {
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
  2024-12-20 10:34   ` Paneer Selvam, Arunpravin
@ 2024-12-20 10:38     ` Christian König
  2024-12-20 10:45       ` Paneer Selvam, Arunpravin
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2024-12-20 10:38 UTC (permalink / raw)
  To: Paneer Selvam, Arunpravin, amd-gfx; +Cc: alexander.deucher

Hi Arun,

Am 20.12.24 um 11:34 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
>
> On 12/19/2024 4:11 PM, Christian König wrote:
>>
>>
>> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>>> Fix out-of-bounds issue in userq fence create when
>>> accessing the userq xa structure. Added a lock to
>>> protect the race condition.
>>>
>>> v2:(Christian)
>>>    - Acquire xa lock only for the xa_for_each blocks and
>>>      not for the kvmalloc_array() memory allocation part.
>>>
>>> v3:
>>>    - Replacing the kvmalloc_array() storage with xa_alloc()
>>>      solves the problem.
>>>
>>> BUG: KASAN: slab-out-of-bounds in 
>>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [  +0.000006] Call Trace:
>>> [  +0.000005]  <TASK>
>>> [  +0.000005]  dump_stack_lvl+0x6c/0x90
>>> [  +0.000011]  print_report+0xc4/0x5e0
>>> [  +0.000009]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000008]  ? kasan_complete_mode_report_info+0x26/0x1d0
>>> [  +0.000007]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [  +0.000405]  kasan_report+0xdf/0x120
>>> [  +0.000009]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [  +0.000405]  __asan_report_store8_noabort+0x17/0x20
>>> [  +0.000007]  amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [  +0.000406]  ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>>> [  +0.000408]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000008]  ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>>> [  +0.000013]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000008]  amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>>> [  +0.000412]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [  +0.000404]  ? try_to_wake_up+0x165/0x1840
>>> [  +0.000010]  ? __pfx_futex_wake_mark+0x10/0x10
>>> [  +0.000011]  drm_ioctl_kernel+0x178/0x2f0 [drm]
>>> [  +0.000050]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [  +0.000404]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>>> [  +0.000043]  ? __kasan_check_read+0x11/0x20
>>> [  +0.000007]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000007]  ? __kasan_check_write+0x14/0x20
>>> [  +0.000008]  drm_ioctl+0x513/0xd20 [drm]
>>> [  +0.000040]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [  +0.000407]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
>>> [  +0.000044]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000007]  ? _raw_spin_lock_irqsave+0x99/0x100
>>> [  +0.000007]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>>> [  +0.000006]  ? __rseq_handle_notify_resume+0x188/0xc30
>>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>>> [  +0.000006]  ? _raw_spin_unlock_irqrestore+0x27/0x50
>>> [  +0.000010]  amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>>> [  +0.000388]  __x64_sys_ioctl+0x135/0x1b0
>>> [  +0.000009]  x64_sys_call+0x1205/0x20d0
>>> [  +0.000007]  do_syscall_64+0x4d/0x120
>>> [  +0.000008]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>> [  +0.000007] RIP: 0033:0x7f7c3d31a94f
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> ---
>>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 43 
>>> +++++++------------
>>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  3 +-
>>>   2 files changed, 17 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> index 2e7271362ace..4289bed6c1f7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct 
>>> amdgpu_device *adev,
>>>     void amdgpu_userq_fence_driver_process(struct 
>>> amdgpu_userq_fence_driver *fence_drv)
>>>   {
>>> +    struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>       struct amdgpu_userq_fence *userq_fence, *tmp;
>>>       struct dma_fence *fence;
>>> +    unsigned long index;
>>>       u64 rptr;
>>> -    int i;
>>>         if (!fence_drv)
>>>           return;
>>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct 
>>> amdgpu_userq_fence_driver *fence_d
>>>             dma_fence_signal(fence);
>>>   -        for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>>> +        xa_for_each(&userq_fence->fence_drv_xa, index, 
>>> stored_fence_drv)
>>> +            amdgpu_userq_fence_driver_put(stored_fence_drv);
>>>             list_del(&userq_fence->link);
>>>           dma_fence_put(fence);
>>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct 
>>> amdgpu_usermode_queue *userq,
>>>       dma_fence_init(fence, &amdgpu_userq_fence_ops, 
>>> &userq_fence->lock,
>>>                  fence_drv->context, seq);
>>>   +    xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>>> +
>>>       amdgpu_userq_fence_driver_get(fence_drv);
>>>       dma_fence_get(fence);
>>>         if (!xa_empty(&userq->fence_drv_xa)) {
>>>           struct amdgpu_userq_fence_driver *stored_fence_drv;
>>> -        unsigned long index, count = 0;
>>> -        int i = 0;
>>> -
>>> -        xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>>> -            count++;
>>> -
>>> -        userq_fence->fence_drv_array =
>>> -            kvmalloc_array(count,
>>> -                       sizeof(struct amdgpu_userq_fence_driver *),
>>> -                       GFP_KERNEL);
>>> -
>>> -        if (userq_fence->fence_drv_array) {
>>> -            xa_for_each(&userq->fence_drv_xa, index, 
>>> stored_fence_drv) {
>>> -                userq_fence->fence_drv_array[i] = stored_fence_drv;
>>> -                xa_erase(&userq->fence_drv_xa, index);
>>> -                i++;
>>> -            }
>>> +        unsigned long index_uq;
>>> +        u32 index_uf;
>>> +        int err;
>>> +
>>> +        xa_for_each(&userq->fence_drv_xa, index_uq, 
>>> stored_fence_drv) {
>>> +            err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>>> +                       stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>>
>> That is even worse than what was discussed before. Now you have two 
>> XAs and the second is incorrectly using GFP_KERNEL.
>
> I think the problem here is, the WAIT IOCTL updates the 
> userq->fence_drv_xa entries between the 2 xa_for_each blocks
> exactly at kvmalloc_array memory allocation. Though, we are locking 
> the first and second xa_for_each blocks and having the
> GFP_ATOMIC in place didnt help to resolve the problem.

Yeah, I agree on the problem. But I don't understand why using 
GFP_ATOMIC didn't solved the issue.

>
> For example,
> kvmalloc_array() is allocating the memory for the count value(say 5) 
> and before we acquire the second
> xa_for_each block lock, the count modified to (say 7) by the WAIT 
> IOCTL xa_alloc() function (by acquiring the same lock),
> and we would be iterating for the new count. But the memory allocated 
> would be for 5 entries.
>
> xa_lock()
> first xa_for_each block to count the entries
> xa_unlock()

When you use GFP_ATOMIC you can drop this xa_unlock().

>
> kvmalloc_array allocates for count 5
>
> xa_lock()

And that xa_lock() and so make sure that the xa isn't modified in between.

Regards,
Christian.

> second xa_for_each block to move the entries to allocated memory
> here the count increased to 7
> xa_unlock
>
> Thanks,
> Arun.
>>
>> Regards,
>> Christian.
>>
>>> +            if (err)
>>> +                return err;
>>>           }
>>> -
>>> -        userq_fence->fence_drv_array_count = i;
>>> -    } else {
>>> -        userq_fence->fence_drv_array = NULL;
>>> -        userq_fence->fence_drv_array_count = 0;
>>> +        xa_destroy(&userq->fence_drv_xa);
>>>       }
>>>         /* Check if hardware has already processed the job */
>>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct 
>>> rcu_head *rcu)
>>>         /* Release the fence driver reference */
>>>       amdgpu_userq_fence_driver_put(fence_drv);
>>> -
>>> -    kvfree(userq_fence->fence_drv_array);
>>>       kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>>   }
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> index f1a90840ac1f..3283e5573d10 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>>        */
>>>       spinlock_t lock;
>>>       struct list_head link;
>>> -    unsigned long fence_drv_array_count;
>>> +    struct xarray fence_drv_xa;
>>>       struct amdgpu_userq_fence_driver *fence_drv;
>>> -    struct amdgpu_userq_fence_driver **fence_drv_array;
>>>   };
>>>     struct amdgpu_userq_fence_driver {
>>
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
  2024-12-20 10:38     ` Christian König
@ 2024-12-20 10:45       ` Paneer Selvam, Arunpravin
  0 siblings, 0 replies; 8+ messages in thread
From: Paneer Selvam, Arunpravin @ 2024-12-20 10:45 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: alexander.deucher




On 12/20/2024 4:08 PM, Christian König wrote:
> Hi Arun,
>
> Am 20.12.24 um 11:34 schrieb Paneer Selvam, Arunpravin:
>> Hi Christian,
>>
>>
>> On 12/19/2024 4:11 PM, Christian König wrote:
>>>
>>>
>>> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>>>> Fix out-of-bounds issue in userq fence create when
>>>> accessing the userq xa structure. Added a lock to
>>>> protect the race condition.
>>>>
>>>> v2:(Christian)
>>>>    - Acquire xa lock only for the xa_for_each blocks and
>>>>      not for the kvmalloc_array() memory allocation part.
>>>>
>>>> v3:
>>>>    - Replacing the kvmalloc_array() storage with xa_alloc()
>>>>      solves the problem.
>>>>
>>>> BUG: KASAN: slab-out-of-bounds in 
>>>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000006] Call Trace:
>>>> [  +0.000005]  <TASK>
>>>> [  +0.000005]  dump_stack_lvl+0x6c/0x90
>>>> [  +0.000011]  print_report+0xc4/0x5e0
>>>> [  +0.000009]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  ? kasan_complete_mode_report_info+0x26/0x1d0
>>>> [  +0.000007]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000405]  kasan_report+0xdf/0x120
>>>> [  +0.000009]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000405]  __asan_report_store8_noabort+0x17/0x20
>>>> [  +0.000007]  amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000406]  ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>>>> [  +0.000408]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>>>> [  +0.000013]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>>>> [  +0.000412]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [  +0.000404]  ? try_to_wake_up+0x165/0x1840
>>>> [  +0.000010]  ? __pfx_futex_wake_mark+0x10/0x10
>>>> [  +0.000011]  drm_ioctl_kernel+0x178/0x2f0 [drm]
>>>> [  +0.000050]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [  +0.000404]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>>>> [  +0.000043]  ? __kasan_check_read+0x11/0x20
>>>> [  +0.000007]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000007]  ? __kasan_check_write+0x14/0x20
>>>> [  +0.000008]  drm_ioctl+0x513/0xd20 [drm]
>>>> [  +0.000040]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [  +0.000407]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
>>>> [  +0.000044]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000007]  ? _raw_spin_lock_irqsave+0x99/0x100
>>>> [  +0.000007]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>>>> [  +0.000006]  ? __rseq_handle_notify_resume+0x188/0xc30
>>>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000006]  ? _raw_spin_unlock_irqrestore+0x27/0x50
>>>> [  +0.000010]  amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>>>> [  +0.000388]  __x64_sys_ioctl+0x135/0x1b0
>>>> [  +0.000009]  x64_sys_call+0x1205/0x20d0
>>>> [  +0.000007]  do_syscall_64+0x4d/0x120
>>>> [  +0.000008]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>>> [  +0.000007] RIP: 0033:0x7f7c3d31a94f
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>> ---
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 43 
>>>> +++++++------------
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  3 +-
>>>>   2 files changed, 17 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> index 2e7271362ace..4289bed6c1f7 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct 
>>>> amdgpu_device *adev,
>>>>     void amdgpu_userq_fence_driver_process(struct 
>>>> amdgpu_userq_fence_driver *fence_drv)
>>>>   {
>>>> +    struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>>       struct amdgpu_userq_fence *userq_fence, *tmp;
>>>>       struct dma_fence *fence;
>>>> +    unsigned long index;
>>>>       u64 rptr;
>>>> -    int i;
>>>>         if (!fence_drv)
>>>>           return;
>>>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct 
>>>> amdgpu_userq_fence_driver *fence_d
>>>>             dma_fence_signal(fence);
>>>>   -        for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>>>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>>>> +        xa_for_each(&userq_fence->fence_drv_xa, index, 
>>>> stored_fence_drv)
>>>> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>>>>             list_del(&userq_fence->link);
>>>>           dma_fence_put(fence);
>>>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct 
>>>> amdgpu_usermode_queue *userq,
>>>>       dma_fence_init(fence, &amdgpu_userq_fence_ops, 
>>>> &userq_fence->lock,
>>>>                  fence_drv->context, seq);
>>>>   +    xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>>>> +
>>>>       amdgpu_userq_fence_driver_get(fence_drv);
>>>>       dma_fence_get(fence);
>>>>         if (!xa_empty(&userq->fence_drv_xa)) {
>>>>           struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>> -        unsigned long index, count = 0;
>>>> -        int i = 0;
>>>> -
>>>> -        xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>>>> -            count++;
>>>> -
>>>> -        userq_fence->fence_drv_array =
>>>> -            kvmalloc_array(count,
>>>> -                       sizeof(struct amdgpu_userq_fence_driver *),
>>>> -                       GFP_KERNEL);
>>>> -
>>>> -        if (userq_fence->fence_drv_array) {
>>>> -            xa_for_each(&userq->fence_drv_xa, index, 
>>>> stored_fence_drv) {
>>>> -                userq_fence->fence_drv_array[i] = stored_fence_drv;
>>>> -                xa_erase(&userq->fence_drv_xa, index);
>>>> -                i++;
>>>> -            }
>>>> +        unsigned long index_uq;
>>>> +        u32 index_uf;
>>>> +        int err;
>>>> +
>>>> +        xa_for_each(&userq->fence_drv_xa, index_uq, 
>>>> stored_fence_drv) {
>>>> +            err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>>>> +                       stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>>>
>>> That is even worse than what was discussed before. Now you have two 
>>> XAs and the second is incorrectly using GFP_KERNEL.
>>
>> I think the problem here is, the WAIT IOCTL updates the 
>> userq->fence_drv_xa entries between the 2 xa_for_each blocks
>> exactly at kvmalloc_array memory allocation. Though, we are locking 
>> the first and second xa_for_each blocks and having the
>> GFP_ATOMIC in place didnt help to resolve the problem.
>
> Yeah, I agree on the problem. But I don't understand why using 
> GFP_ATOMIC didn't solved the issue.
>
>>
>> For example,
>> kvmalloc_array() is allocating the memory for the count value(say 5) 
>> and before we acquire the second
>> xa_for_each block lock, the count modified to (say 7) by the WAIT 
>> IOCTL xa_alloc() function (by acquiring the same lock),
>> and we would be iterating for the new count. But the memory allocated 
>> would be for 5 entries.
>>
>> xa_lock()
>> first xa_for_each block to count the entries
>> xa_unlock()
>
> When you use GFP_ATOMIC you can drop this xa_unlock().
>
>>
>> kvmalloc_array allocates for count 5
>>
>> xa_lock()
>
> And that xa_lock() and so make sure that the xa isn't modified in 
> between.
Got it. Thanks!

Regards,
Arun.
>
> Regards,
> Christian.
>
>> second xa_for_each block to move the entries to allocated memory
>> here the count increased to 7
>> xa_unlock
>>
>> Thanks,
>> Arun.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> +            if (err)
>>>> +                return err;
>>>>           }
>>>> -
>>>> -        userq_fence->fence_drv_array_count = i;
>>>> -    } else {
>>>> -        userq_fence->fence_drv_array = NULL;
>>>> -        userq_fence->fence_drv_array_count = 0;
>>>> +        xa_destroy(&userq->fence_drv_xa);
>>>>       }
>>>>         /* Check if hardware has already processed the job */
>>>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct 
>>>> rcu_head *rcu)
>>>>         /* Release the fence driver reference */
>>>>       amdgpu_userq_fence_driver_put(fence_drv);
>>>> -
>>>> -    kvfree(userq_fence->fence_drv_array);
>>>>       kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>>>   }
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> index f1a90840ac1f..3283e5573d10 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>>>        */
>>>>       spinlock_t lock;
>>>>       struct list_head link;
>>>> -    unsigned long fence_drv_array_count;
>>>> +    struct xarray fence_drv_xa;
>>>>       struct amdgpu_userq_fence_driver *fence_drv;
>>>> -    struct amdgpu_userq_fence_driver **fence_drv_array;
>>>>   };
>>>>     struct amdgpu_userq_fence_driver {
>>>
>>
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-12-20 10:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-19 10:38 [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 2/3] drm/amdgpu: Improve the xa field names readability Arunpravin Paneer Selvam
2024-12-19 10:38 ` [PATCH v3 3/3] drm/amdgpu: Fix wait IOCTL lockup issues Arunpravin Paneer Selvam
2024-12-19 10:43   ` Christian König
2024-12-19 10:41 ` [PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence Christian König
2024-12-20 10:34   ` Paneer Selvam, Arunpravin
2024-12-20 10:38     ` Christian König
2024-12-20 10:45       ` Paneer Selvam, Arunpravin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.