[PATCH 1/2] habanalabs: support device memory memset

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] habanalabs: support device memory memset > 4GB
@ 2019-05-19 11:12 Oded Gabbay
  2019-05-19 11:12 ` [PATCH 2/2] habanalabs: don't limit packet size for device CPU Oded Gabbay
  0 siblings, 1 reply; 2+ messages in thread
From: Oded Gabbay @ 2019-05-19 11:12 UTC (permalink / raw)
  To: linux-kernel; +Cc: gregkh

This patch adds support to the goya memset function to perform memset to
device memory with size larger then 4GB. In this case, we need to use
multiple LIN_DMA packets because a single packet supports up to 4GB.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c | 49 ++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index be27ec6cf5fd..6ee5db697ca5 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4478,36 +4478,47 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
 	return goya->events_stat;
 }
 
-static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
+static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
 				u64 val, bool is_dram)
 {
 	struct packet_lin_dma *lin_dma_pkt;
 	struct hl_cs_job *job;
 	u32 cb_size, ctl;
 	struct hl_cb *cb;
-	int rc;
+	int rc, lin_dma_pkts_cnt;
 
-	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
+	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
+						sizeof(struct packet_msg_prot);
+	cb = hl_cb_kernel_create(hdev, cb_size);
 	if (!cb)
-		return -EFAULT;
+		return -ENOMEM;
 
 	lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
 
-	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
-	cb_size = sizeof(*lin_dma_pkt);
-
-	ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
-			(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
-			(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
-			(1 << GOYA_PKT_CTL_RB_SHIFT) |
-			(1 << GOYA_PKT_CTL_MB_SHIFT));
-	ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
-			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
-	lin_dma_pkt->ctl = cpu_to_le32(ctl);
+	do {
+		memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
+
+		ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
+				(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
+				(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
+				(1 << GOYA_PKT_CTL_RB_SHIFT) |
+				(1 << GOYA_PKT_CTL_MB_SHIFT));
+		ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
+				GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+		lin_dma_pkt->ctl = cpu_to_le32(ctl);
+
+		lin_dma_pkt->src_addr = cpu_to_le64(val);
+		lin_dma_pkt->dst_addr = cpu_to_le64(addr);
+		if (lin_dma_pkts_cnt > 1)
+			lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
+		else
+			lin_dma_pkt->tsize = cpu_to_le32(size);
 
-	lin_dma_pkt->src_addr = cpu_to_le64(val);
-	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
-	lin_dma_pkt->tsize = cpu_to_le32(size);
+		size -= SZ_2G;
+		addr += SZ_2G;
+		lin_dma_pkt++;
+	} while (--lin_dma_pkts_cnt);
 
 	job = hl_cs_allocate_job(hdev, true);
 	if (!job) {
@@ -4522,7 +4533,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
 	job->user_cb_size = cb_size;
 	job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
 	job->patched_cb = job->user_cb;
-	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
+	job->job_cb_size = job->user_cb_size;
 
 	hl_debugfs_add_job(hdev, job);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] habanalabs: don't limit packet size for device CPU
  2019-05-19 11:12 [PATCH 1/2] habanalabs: support device memory memset > 4GB Oded Gabbay
@ 2019-05-19 11:12 ` Oded Gabbay
  0 siblings, 0 replies; 2+ messages in thread
From: Oded Gabbay @ 2019-05-19 11:12 UTC (permalink / raw)
  To: linux-kernel; +Cc: gregkh

This patch removes a limitation on the maximum packet size that is read by
the device CPU as that limitation is not needed.

Therefore, the patch also removes an elaborate calculation that is based
on this limitation which is also not needed now. Instead, use a fixed
value for the memory pool size of the packets.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/firmware_if.c | 12 ------------
 drivers/misc/habanalabs/goya/goya.c   |  2 +-
 drivers/misc/habanalabs/habanalabs.h  | 12 ++----------
 3 files changed, 3 insertions(+), 23 deletions(-)

diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index 0cbdfa0d7fba..cc8168bacb24 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -85,12 +85,6 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 	u32 tmp;
 	int rc = 0;
 
-	if (len > HL_CPU_CB_SIZE) {
-		dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
-			len);
-		return -ENOMEM;
-	}
-
 	pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
 								&pkt_dma_addr);
 	if (!pkt) {
@@ -181,9 +175,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 {
 	u64 kernel_addr;
 
-	/* roundup to HL_CPU_PKT_SIZE */
-	size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
-
 	kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
 
 	*dma_handle = hdev->cpu_accessible_dma_address +
@@ -195,9 +186,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr)
 {
-	/* roundup to HL_CPU_PKT_SIZE */
-	size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
-
 	gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
 			size);
 }
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 6ee5db697ca5..e0fc511acaec 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -655,7 +655,7 @@ static int goya_sw_init(struct hl_device *hdev)
 		goto free_dma_pool;
 	}
 
-	hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1);
+	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
 	if (!hdev->cpu_accessible_dma_pool) {
 		dev_err(hdev->dev,
 			"Failed to create CPU accessible DMA pool\n");
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 9b1c03f1ab32..0462b7727da7 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -320,18 +320,10 @@ struct hl_cs_job;
 #define HL_EQ_LENGTH			64
 #define HL_EQ_SIZE_IN_BYTES		(HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
 
-#define HL_CPU_PKT_SHIFT		5
-#define HL_CPU_PKT_SIZE			(1 << HL_CPU_PKT_SHIFT)
-#define HL_CPU_PKT_MASK			(~((1 << HL_CPU_PKT_SHIFT) - 1))
-#define HL_CPU_MAX_PKTS_IN_CB		32
-#define HL_CPU_CB_SIZE			(HL_CPU_PKT_SIZE * \
-					 HL_CPU_MAX_PKTS_IN_CB)
-#define HL_CPU_CB_QUEUE_SIZE		(HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
-
-/* KMD <-> ArmCP shared memory size (EQ + PQ + CPU CB queue) */
+/* KMD <-> ArmCP shared memory size (EQ + PQ + 2MB for packets) */
 #define HL_CPU_ACCESSIBLE_MEM_SIZE	(HL_EQ_SIZE_IN_BYTES + \
 					 HL_QUEUE_SIZE_IN_BYTES + \
-					 HL_CPU_CB_QUEUE_SIZE)
+					 SZ_2M)
 
 /**
  * struct hl_hw_queue - describes a H/W transport queue.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-05-19 18:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-05-19 11:12 [PATCH 1/2] habanalabs: support device memory memset > 4GB Oded Gabbay
2019-05-19 11:12 ` [PATCH 2/2] habanalabs: don't limit packet size for device CPU Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox