public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: SW_Drivers@habana.ai, Tomer Tayar <ttayar@habana.ai>
Subject: [PATCH] habanalabs: Separate CS job completion from its deallocation
Date: Wed,  4 Nov 2020 16:09:06 +0200	[thread overview]
Message-ID: <20201104140908.10178-10-ogabbay@kernel.org> (raw)
In-Reply-To: <20201104140908.10178-1-ogabbay@kernel.org>

From: Tomer Tayar <ttayar@habana.ai>

Current CS jobs are no longer needed after their completion.
However, jobs of future workload might be in use even after they are
completed. To allow that, the patch adds a refcount to the job object,
and decouples its completion handling from its deallocation.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 25 ++++++++++++++-----
 drivers/misc/habanalabs/common/debugfs.c      | 13 +++++-----
 drivers/misc/habanalabs/common/habanalabs.h   |  2 ++
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 6de4b161856c..662b19663839 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -155,6 +155,18 @@ static void cs_put(struct hl_cs *cs)
 	kref_put(&cs->refcount, cs_do_release);
 }
 
+static void cs_job_do_release(struct kref *ref)
+{
+	struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
+
+	kfree(job);
+}
+
+static void cs_job_put(struct hl_cs_job *job)
+{
+	kref_put(&job->refcount, cs_job_do_release);
+}
+
 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 {
 	/*
@@ -227,7 +239,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 	return rc;
 }
 
-static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
+static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 {
 	struct hl_cs *cs = job->cs;
 
@@ -276,7 +288,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 			job->queue_type == QUEUE_TYPE_HW)
 		cs_put(cs);
 
-	kfree(job);
+	cs_job_put(job);
 }
 
 static void cs_do_release(struct kref *ref)
@@ -290,13 +302,13 @@ static void cs_do_release(struct kref *ref)
 	/*
 	 * Although if we reached here it means that all external jobs have
 	 * finished, because each one of them took refcnt to CS, we still
-	 * need to go over the internal jobs and free them. Otherwise, we
+	 * need to go over the internal jobs and complete them. Otherwise, we
 	 * will have leaked memory and what's worse, the CS object (and
 	 * potentially the CTX object) could be released, while the JOB
 	 * still holds a pointer to them (but no reference).
 	 */
 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
-		free_job(hdev, job);
+		complete_job(hdev, job);
 
 	if (!cs->submitted) {
 		/* In case the wait for signal CS was submitted, the put occurs
@@ -507,7 +519,7 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 	struct hl_cs_job *job, *tmp;
 
 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
-		free_job(hdev, job);
+		complete_job(hdev, job);
 }
 
 void hl_cs_rollback_all(struct hl_device *hdev)
@@ -539,7 +551,7 @@ static void job_wq_completion(struct work_struct *work)
 	struct hl_device *hdev = cs->ctx->hdev;
 
 	/* job is no longer needed */
-	free_job(hdev, job);
+	complete_job(hdev, job);
 }
 
 static int validate_queue_index(struct hl_device *hdev,
@@ -647,6 +659,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 	if (!job)
 		return NULL;
 
+	kref_init(&job->refcount);
 	job->queue_type = queue_type;
 	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
 
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 912ddfa360b1..b44193ec3d12 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -168,18 +168,19 @@ static int command_submission_jobs_show(struct seq_file *s, void *data)
 		if (first) {
 			first = false;
 			seq_puts(s, "\n");
-			seq_puts(s, " JOB ID   CS ID    CTX ASID   H/W Queue\n");
-			seq_puts(s, "---------------------------------------\n");
+			seq_puts(s, " JOB ID   CS ID    CTX ASID   JOB RefCnt   H/W Queue\n");
+			seq_puts(s, "----------------------------------------------------\n");
 		}
 		if (job->cs)
 			seq_printf(s,
-				"    %02d       %llu         %d         %d\n",
+				"   %02d      %llu        %d          %d           %d\n",
 				job->id, job->cs->sequence, job->cs->ctx->asid,
-				job->hw_queue_id);
+				kref_read(&job->refcount), job->hw_queue_id);
 		else
 			seq_printf(s,
-				"    %02d       0         %d         %d\n",
-				job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
+				"   %02d      0        %d          %d           %d\n",
+				job->id, HL_KERNEL_ASID_ID,
+				kref_read(&job->refcount), job->hw_queue_id);
 	}
 
 	spin_unlock(&dev_entry->cs_job_spinlock);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 227f1a9552a6..2e3021adc824 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1142,6 +1142,7 @@ struct hl_cs {
  * @userptr_list: linked-list of userptr mappings that belong to this job and
  *			wait for completion.
  * @debugfs_list: node in debugfs list of command submission jobs.
+ * @refcount: reference counter for usage of the CS job.
  * @queue_type: the type of the H/W queue this job is submitted to.
  * @id: the id of this job inside a CS.
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
@@ -1165,6 +1166,7 @@ struct hl_cs_job {
 	struct work_struct	finish_work;
 	struct list_head	userptr_list;
 	struct list_head	debugfs_list;
+	struct kref		refcount;
 	enum hl_queue_type	queue_type;
 	u32			id;
 	u32			hw_queue_id;
-- 
2.17.1


  parent reply	other threads:[~2020-11-04 14:10 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-04 14:08 [PATCH] habanalabs: add 'needs reset' state in driver Oded Gabbay
2020-11-04 14:08 ` [PATCH] habanalabs: fix hard reset print and comment Oded Gabbay
2020-11-04 14:08 ` [PATCH] habanalabs/gaudi: increase MAX CS to 16K Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs/gaudi: remove pcie_en strap toggle Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs: Move repeatedly included headers to habanalabs.h Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs: release signal if collective wait was dropped Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs: remove duplicate print Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs: reset device upon fw read failure Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs: restore vm_pgoff after mmap Oded Gabbay
2020-11-04 14:09 ` Oded Gabbay [this message]
2020-11-04 14:09 ` [PATCH] habanalabs: Skip updating CI of internal queues if not in use Oded Gabbay
2020-11-04 14:09 ` [PATCH] habanalabs: Small refactoring of cs_do_release() Oded Gabbay

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201104140908.10178-10-ogabbay@kernel.org \
    --to=ogabbay@kernel.org \
    --cc=SW_Drivers@habana.ai \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ttayar@habana.ai \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox