All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oded Gabbay <oded.gabbay@gmail.com>
To: linux-kernel@vger.kernel.org, SW_Drivers@habana.ai
Cc: Ofir Bitton <obitton@habana.ai>
Subject: [PATCH 2/4] habanalabs: Assign each CQ with its own work queue
Date: Fri, 10 Jul 2020 20:36:50 +0300	[thread overview]
Message-ID: <20200710173652.31039-2-oded.gabbay@gmail.com> (raw)
In-Reply-To: <20200710173652.31039-1-oded.gabbay@gmail.com>

From: Ofir Bitton <obitton@habana.ai>

We identified a possible race during job completion when working
with a single multi-threaded work queue. In order to overcome this
race we suggest using a single threaded work queue per completion
queue, hence we guarantee jobs completion in order.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/command_submission.c |  4 +-
 drivers/misc/habanalabs/device.c             | 39 ++++++++++++++++----
 drivers/misc/habanalabs/habanalabs.h         |  7 +++-
 drivers/misc/habanalabs/irq.c                |  2 +-
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 1ba937b9a22e..54f2f5afdd2a 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -487,10 +487,12 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 
 void hl_cs_rollback_all(struct hl_device *hdev)
 {
+	int i;
 	struct hl_cs *cs, *tmp;
 
 	/* flush all completions */
-	flush_workqueue(hdev->cq_wq);
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		flush_workqueue(hdev->cq_wq[i]);
 
 	/* Make sure we don't have leftovers in the H/W queues mirror list */
 	list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index df709767c7ea..84800efec10d 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -249,7 +249,8 @@ static void device_cdev_sysfs_del(struct hl_device *hdev)
  */
 static int device_early_init(struct hl_device *hdev)
 {
-	int rc;
+	int i, rc;
+	char workq_name[32];
 
 	switch (hdev->asic_type) {
 	case ASIC_GOYA:
@@ -274,11 +275,24 @@ static int device_early_init(struct hl_device *hdev)
 	if (rc)
 		goto early_fini;
 
-	hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
-	if (hdev->cq_wq == NULL) {
-		dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
-		rc = -ENOMEM;
-		goto asid_fini;
+	if (hdev->asic_prop.completion_queues_count) {
+		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
+				sizeof(*hdev->cq_wq),
+				GFP_ATOMIC);
+		if (!hdev->cq_wq) {
+			rc = -ENOMEM;
+			goto asid_fini;
+		}
+	}
+
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
+		snprintf(workq_name, 32, "hl-free-jobs-%u", i);
+		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
+		if (hdev->cq_wq == NULL) {
+			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+			rc = -ENOMEM;
+			goto free_cq_wq;
+		}
 	}
 
 	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
@@ -321,7 +335,10 @@ static int device_early_init(struct hl_device *hdev)
 free_eq_wq:
 	destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
-	destroy_workqueue(hdev->cq_wq);
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		if (hdev->cq_wq[i])
+			destroy_workqueue(hdev->cq_wq[i]);
+	kfree(hdev->cq_wq);
 asid_fini:
 	hl_asid_fini(hdev);
 early_fini:
@@ -339,6 +356,8 @@ static int device_early_init(struct hl_device *hdev)
  */
 static void device_early_fini(struct hl_device *hdev)
 {
+	int i;
+
 	mutex_destroy(&hdev->mmu_cache_lock);
 	mutex_destroy(&hdev->debug_lock);
 	mutex_destroy(&hdev->send_cpu_message_lock);
@@ -351,7 +370,10 @@ static void device_early_fini(struct hl_device *hdev)
 	kfree(hdev->hl_chip_info);
 
 	destroy_workqueue(hdev->eq_wq);
-	destroy_workqueue(hdev->cq_wq);
+
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		destroy_workqueue(hdev->cq_wq[i]);
+	kfree(hdev->cq_wq);
 
 	hl_asid_fini(hdev);
 
@@ -1181,6 +1203,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 				"failed to initialize completion queue\n");
 			goto cq_fini;
 		}
+		hdev->completion_queue[i].cq_idx = i;
 	}
 
 	/*
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index ea0fd178accb..01fb45887a5a 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -479,6 +479,7 @@ struct hl_hw_queue {
  * @hdev: pointer to the device structure
  * @kernel_address: holds the queue's kernel virtual address
  * @bus_address: holds the queue's DMA address
+ * @cq_idx: completion queue index in array
  * @hw_queue_id: the id of the matching H/W queue
  * @ci: ci inside the queue
  * @pi: pi inside the queue
@@ -488,6 +489,7 @@ struct hl_cq {
 	struct hl_device	*hdev;
 	u64			kernel_address;
 	dma_addr_t		bus_address;
+	u32			cq_idx;
 	u32			hw_queue_id;
 	u32			ci;
 	u32			pi;
@@ -1396,7 +1398,8 @@ struct hl_device_idle_busy_ts {
  * @asic_name: ASIC specific nmae.
  * @asic_type: ASIC specific type.
  * @completion_queue: array of hl_cq.
- * @cq_wq: work queue of completion queues for executing work in process context
+ * @cq_wq: work queues of completion queues for executing work in process
+ *         context.
  * @eq_wq: work queue of event queue for executing work in process context.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
@@ -1492,7 +1495,7 @@ struct hl_device {
 	char				asic_name[16];
 	enum hl_asic_type		asic_type;
 	struct hl_cq			*completion_queue;
-	struct workqueue_struct		*cq_wq;
+	struct workqueue_struct		**cq_wq;
 	struct workqueue_struct		*eq_wq;
 	struct hl_ctx			*kernel_ctx;
 	struct hl_hw_queue		*kernel_queues;
diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
index 195a5ecba0e8..c8db717023f5 100644
--- a/drivers/misc/habanalabs/irq.c
+++ b/drivers/misc/habanalabs/irq.c
@@ -119,7 +119,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 
 		if ((shadow_index_valid) && (!hdev->disabled)) {
 			job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
-			queue_work(hdev->cq_wq, &job->finish_work);
+			queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
 		}
 
 		atomic_inc(&queue->ci);
-- 
2.17.1


  reply	other threads:[~2020-07-10 17:36 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-10 17:36 [PATCH 1/4] habanalabs: halt device CPU only upon certain reset Oded Gabbay
2020-07-10 17:36 ` Oded Gabbay [this message]
2020-07-10 17:36 ` [PATCH 3/4] habanalabs: verify queue can contain all cs jobs Oded Gabbay
2020-07-10 17:36 ` [PATCH 4/4] habanalabs: check for DMA errors when clearing memory Oded Gabbay
2020-07-13  7:24 ` [PATCH 1/4] habanalabs: halt device CPU only upon certain reset Tomer Tayar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200710173652.31039-2-oded.gabbay@gmail.com \
    --to=oded.gabbay@gmail.com \
    --cc=SW_Drivers@habana.ai \
    --cc=linux-kernel@vger.kernel.org \
    --cc=obitton@habana.ai \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.