AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdkfd: Fix sdma queue allocate race condition
@ 2019-05-31 21:19 Zeng, Oak
       [not found] ` <1559337538-14249-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Zeng, Oak @ 2019-05-31 21:19 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
  Cc: Zhao, Yong, Kuehling, Felix, Freehill, Chris, Zeng, Oak,
	Liu, Alex

SDMA queue allocation requires the dqm lock as it modify
the global dqm members. Introduce functions to allocate/deallocate
in locked/unlocked circumstance.

Change-Id: Id3084524c5f65d9629b12cf6b4862a7516945cb1
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 46 ++++++++++++++++------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ece35c7..1f707bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -61,6 +61,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
 				struct queue *q);
+static void deallocate_sdma_queue_locked(struct device_queue_manager *dqm,
+				struct queue *q);
 
 static void kfd_process_hw_exception(struct work_struct *work);
 
@@ -446,10 +448,10 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 		deallocate_hqd(dqm, q);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		dqm->sdma_queue_count--;
-		deallocate_sdma_queue(dqm, q);
+		deallocate_sdma_queue_locked(dqm, q);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 		dqm->xgmi_sdma_queue_count--;
-		deallocate_sdma_queue(dqm, q);
+		deallocate_sdma_queue_locked(dqm, q);
 	} else {
 		pr_debug("q->properties.type %d is invalid\n",
 				q->properties.type);
@@ -922,8 +924,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		if (dqm->sdma_bitmap == 0)
 			return -ENOMEM;
+		dqm_lock(dqm);
 		bit = __ffs64(dqm->sdma_bitmap);
 		dqm->sdma_bitmap &= ~(1ULL << bit);
+		dqm_unlock(dqm);
 		q->sdma_id = bit;
 		q->properties.sdma_engine_id = q->sdma_id %
 				get_num_sdma_engines(dqm);
@@ -932,8 +936,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 		if (dqm->xgmi_sdma_bitmap == 0)
 			return -ENOMEM;
+		dqm_lock(dqm);
 		bit = __ffs64(dqm->xgmi_sdma_bitmap);
 		dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+		dqm_unlock(dqm);
 		q->sdma_id = bit;
 		/* sdma_engine_id is sdma id including
 		 * both PCIe-optimized SDMAs and XGMI-
@@ -953,17 +959,35 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
 	return 0;
 }
 
+static void deallocate_sdma_queue_locked(struct device_queue_manager *dqm,
+				struct queue *q)
+{
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		if (q->sdma_id >= get_num_sdma_queues(dqm))
+			return;
+		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
+			return;
+		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+	}
+}
+
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
 				struct queue *q)
 {
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		if (q->sdma_id >= get_num_sdma_queues(dqm))
 			return;
+		dqm_lock(dqm);
 		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+		dqm_unlock(dqm);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
 			return;
+		dqm_lock(dqm);
 		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+		dqm_unlock(dqm);
 	}
 }
 
@@ -982,7 +1006,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 
 	retval = allocate_doorbell(qpd, q);
 	if (retval)
-		goto out_deallocate_sdma_queue;
+		goto out_deallocate_sdma_queue_locked;
 
 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
 	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1001,8 +1025,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 out_deallocate_doorbell:
 	deallocate_doorbell(qpd, q);
-out_deallocate_sdma_queue:
-	deallocate_sdma_queue(dqm, q);
+out_deallocate_sdma_queue_locked:
+	deallocate_sdma_queue_locked(dqm, q);
 
 	return retval;
 }
@@ -1194,7 +1218,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 	retval = allocate_doorbell(qpd, q);
 	if (retval)
-		goto out_deallocate_sdma_queue;
+		goto out_deallocate_sdma_queue_locked;
 
 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 			q->properties.type)];
@@ -1242,7 +1266,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 out_deallocate_doorbell:
 	deallocate_doorbell(qpd, q);
-out_deallocate_sdma_queue:
+out_deallocate_sdma_queue_locked:
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 		deallocate_sdma_queue(dqm, q);
@@ -1396,10 +1420,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		dqm->sdma_queue_count--;
-		deallocate_sdma_queue(dqm, q);
+		deallocate_sdma_queue_locked(dqm, q);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 		dqm->xgmi_sdma_queue_count--;
-		deallocate_sdma_queue(dqm, q);
+		deallocate_sdma_queue_locked(dqm, q);
 	}
 
 	list_del(&q->list);
@@ -1625,10 +1649,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 	list_for_each_entry(q, &qpd->queues_list, list) {
 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 			dqm->sdma_queue_count--;
-			deallocate_sdma_queue(dqm, q);
+			deallocate_sdma_queue_locked(dqm, q);
 		} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 			dqm->xgmi_sdma_queue_count--;
-			deallocate_sdma_queue(dqm, q);
+			deallocate_sdma_queue_locked(dqm, q);
 		}
 
 		if (q->properties.is_active)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-06-03 16:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-05-31 21:19 [PATCH 1/4] drm/amdkfd: Fix sdma queue allocate race condition Zeng, Oak
     [not found] ` <1559337538-14249-1-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-05-31 21:19   ` [PATCH 2/4] drm/amdkfd: Only initialize sdma vm for sdma queues Zeng, Oak
2019-05-31 21:19   ` [PATCH 3/4] drm/amdkfd: Refactor create_queue_nocpsch Zeng, Oak
     [not found]     ` <1559337538-14249-3-git-send-email-Oak.Zeng-5C7GfCeVMHo@public.gmane.org>
2019-05-31 21:40       ` Kuehling, Felix
2019-05-31 21:19   ` [PATCH 4/4] drm/amdkfd: Fix a circular lock dependency Zeng, Oak
2019-05-31 21:31   ` [PATCH 1/4] drm/amdkfd: Fix sdma queue allocate race condition Kuehling, Felix
     [not found]     ` <f0e92b32-bd0a-aca4-c7dd-83f8e97b9a6f-5C7GfCeVMHo@public.gmane.org>
2019-05-31 21:50       ` Kuehling, Felix
     [not found]         ` <d00bae2c-8c2f-b8b7-a69a-7205fd01a55a-5C7GfCeVMHo@public.gmane.org>
2019-06-03 16:05           ` Zeng, Oak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox