All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: SW_Drivers@habana.ai, Ofir Bitton <obitton@habana.ai>
Subject: [PATCH 2/4] habanalabs: add support for multiple SOBs per monitor
Date: Mon,  2 Nov 2020 21:58:00 +0200	[thread overview]
Message-ID: <20201102195802.10608-4-ogabbay@kernel.org> (raw)
In-Reply-To: <20201102195802.10608-1-ogabbay@kernel.org>

From: Ofir Bitton <obitton@habana.ai>

Support advanced monitor functionality to monitor more than a
single SOB. In addition expand all CB generation functions
with buffer offset in order to put in them multiple packets that are
generated by different functions.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    |  32 ++++
 drivers/misc/habanalabs/common/habanalabs.h   |  16 +-
 drivers/misc/habanalabs/common/hw_queue.c     |   6 +-
 drivers/misc/habanalabs/gaudi/gaudi.c         | 137 ++++++++++++------
 drivers/misc/habanalabs/goya/goya.c           |   9 +-
 5 files changed, 143 insertions(+), 57 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 9d49dd1558af..0d82c7dd93d0 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,6 +38,38 @@ void hl_sob_reset_error(struct kref *ref)
 			hw_sob->q_idx, hw_sob->sob_id);
 }
 
+/**
+ * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
+ * @sob_base: sob base id
+ * @sob_mask: sob user mask, each bit represents a sob offset from sob base
+ * @mask: generated mask
+ *
+ * Return: 0 if given parameters are valid
+ */
+int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
+{
+	int i;
+
+	if (sob_mask == 0)
+		return -EINVAL;
+
+	if (sob_mask == 0x1) {
+		*mask = ~(1 << (sob_base & 0x7));
+	} else {
+		/* find msb in order to verify sob range is valid */
+		for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
+			if (BIT(i) & sob_mask)
+				break;
+
+		if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
+			return -EINVAL;
+
+		*mask = ~sob_mask;
+	}
+
+	return 0;
+}
+
 static void hl_fence_release(struct kref *kref)
 {
 	struct hl_fence *fence =
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 58b4097235d9..7307e0b88b44 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -77,20 +77,26 @@
 
 #define HL_MAX_DCORES			4
 
+#define HL_MAX_SOBS_PER_MONITOR	8
+
 /**
  * struct hl_gen_wait_properties - properties for generating a wait CB
  * @data: command buffer
  * @q_idx: queue id is used to extract fence register address
- * @sob_id: SOB id to use in this wait CB
+ * @size: offset in command buffer
+ * @sob_base: SOB base to use in this wait CB
  * @sob_val: SOB value to wait for
  * @mon_id: monitor to use in this wait CB
+ * @sob_mask: each bit represents a SOB offset from sob_base to be used
  */
 struct hl_gen_wait_properties {
 	void	*data;
 	u32	q_idx;
-	u16	sob_id;
+	u32	size;
+	u16	sob_base;
 	u16	sob_val;
 	u16	mon_id;
+	u8	sob_mask;
 };
 
 /**
@@ -844,8 +850,9 @@ struct hl_asic_funcs {
 	int (*load_boot_fit_to_device)(struct hl_device *hdev);
 	u32 (*get_signal_cb_size)(struct hl_device *hdev);
 	u32 (*get_wait_cb_size)(struct hl_device *hdev);
-	void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
-	void (*gen_wait_cb)(struct hl_device *hdev,
+	u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
+			u32 size);
+	u32 (*gen_wait_cb)(struct hl_device *hdev,
 			struct hl_gen_wait_properties *prop);
 	void (*reset_sob)(struct hl_device *hdev, void *data);
 	void (*set_dma_mask_from_fw)(struct hl_device *hdev);
@@ -1927,6 +1934,7 @@ void hl_cs_rollback_all(struct hl_device *hdev);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
+int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
 void hl_fence_get(struct hl_fence *fence);
 
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 613681c2cdcc..ca625789d78d 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -408,7 +408,7 @@ static void init_signal_cs(struct hl_device *hdev,
 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
 
 	hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
-				cs_cmpl->hw_sob->sob_id);
+				cs_cmpl->hw_sob->sob_id, 0);
 
 	kref_get(&hw_sob->kref);
 
@@ -455,10 +455,12 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
 		prop->base_mon_id, q_idx);
 
 	wait_prop.data = (void *) job->patched_cb;
-	wait_prop.sob_id = cs_cmpl->hw_sob->sob_id;
+	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
+	wait_prop.sob_mask = 0x1;
 	wait_prop.sob_val = cs_cmpl->sob_val;
 	wait_prop.mon_id = prop->base_mon_id;
 	wait_prop.q_idx = q_idx;
+	wait_prop.size = 0;
 	hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
 
 	kref_get(&cs_cmpl->hw_sob->kref);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 46dced9d1eec..930b26b1f445 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6380,14 +6380,16 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
 			sizeof(struct packet_msg_prot) * 2;
 }
 
-static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
+static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
+		u32 size)
 {
 	struct hl_cb *cb = (struct hl_cb *) data;
 	struct packet_msg_short *pkt;
-	u32 value, ctl;
+	u32 value, ctl, pkt_size = sizeof(*pkt);
 
-	pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
-	memset(pkt, 0, sizeof(*pkt));
+	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address +
+									size);
+	memset(pkt, 0, pkt_size);
 
 	/* Inc by 1, Mode ADD */
 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
@@ -6403,6 +6405,8 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
+
+	return size + pkt_size;
 }
 
 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
@@ -6425,21 +6429,42 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
 	return pkt_size;
 }
 
-static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
-					u16 sob_val, u16 addr)
+static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
+		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
+		u16 sob_val, u16 mon_id)
 {
+	u64 monitor_base;
 	u32 ctl, value, pkt_size = sizeof(*pkt);
-	u8 mask = ~(1 << (sob_id & 0x7));
+	u16 msg_addr_offset;
+	u8 mask;
+
+	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
+		dev_err(hdev->dev,
+			"sob_base %u (mask %#x) is not valid\n",
+			sob_base, sob_mask);
+		return 0;
+	}
+
+	/*
+	 * monitor_base should be the content of the base0 address registers,
+	 * so it will be added to the msg short offsets
+	 */
+	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
+
+	msg_addr_offset =
+		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
+				monitor_base;
 
 	memset(pkt, 0, pkt_size);
 
-	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
+	/* Monitor config packet: bind the monitor to a sync object */
+	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
 			0); /* GREATER OR EQUAL*/
 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
 
-	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
+	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
@@ -6474,60 +6499,61 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
 	return pkt_size;
 }
 
-static void gaudi_gen_wait_cb(struct hl_device *hdev,
-		struct hl_gen_wait_properties *prop)
+static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
 {
-	struct hl_cb *cb = (struct hl_cb *) prop->data;
-	void *buf = (void *) (uintptr_t) cb->kernel_address;
-	u64 monitor_base, fence_addr = 0;
-	u32 size = 0;
-	u16 msg_addr_offset;
+	u32 offset;
 
-	switch (prop->q_idx) {
+	switch (queue_id) {
 	case GAUDI_QUEUE_ID_DMA_0_0:
-		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
+		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
 		break;
 	case GAUDI_QUEUE_ID_DMA_0_1:
-		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
+		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
 		break;
 	case GAUDI_QUEUE_ID_DMA_0_2:
-		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
+		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
 		break;
 	case GAUDI_QUEUE_ID_DMA_0_3:
-		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
+		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
 		break;
 	case GAUDI_QUEUE_ID_DMA_1_0:
-		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
+		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
 		break;
 	case GAUDI_QUEUE_ID_DMA_1_1:
-		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
+		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
 		break;
 	case GAUDI_QUEUE_ID_DMA_1_2:
-		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
+		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
 		break;
 	case GAUDI_QUEUE_ID_DMA_1_3:
-		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
+		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
 		break;
 	case GAUDI_QUEUE_ID_DMA_5_0:
-		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
+		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
 		break;
 	case GAUDI_QUEUE_ID_DMA_5_1:
-		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
+		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
 		break;
 	case GAUDI_QUEUE_ID_DMA_5_2:
-		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
+		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
 		break;
 	case GAUDI_QUEUE_ID_DMA_5_3:
-		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
+		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
 		break;
 	default:
-		/* queue index should be valid here */
-		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
-				prop->q_idx);
-		return;
+		return -EINVAL;
 	}
 
-	fence_addr += CFG_BASE;
+	*addr = CFG_BASE + offset;
+
+	return 0;
+}
+
+static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
+{
+	u64 monitor_base;
+	u32 size = 0;
+	u16 msg_addr_offset;
 
 	/*
 	 * monitor_base should be the content of the base0 address registers,
@@ -6536,15 +6562,17 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev,
 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
 
 	/* First monitor config packet: low address of the sync */
-	msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 +
-			prop->mon_id * 4) - monitor_base;
+	msg_addr_offset =
+		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
+				monitor_base;
 
 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
 					msg_addr_offset);
 
 	/* Second monitor config packet: high address of the sync */
-	msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 +
-			prop->mon_id * 4) - monitor_base;
+	msg_addr_offset =
+		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
+				monitor_base;
 
 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
 					msg_addr_offset);
@@ -6553,20 +6581,35 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev,
 	 * Third monitor config packet: the payload, i.e. what to write when the
 	 * sync triggers
 	 */
-	msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 +
-			prop->mon_id * 4) - monitor_base;
+	msg_addr_offset =
+		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
+				monitor_base;
 
 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
 
-	/* Fourth monitor config packet: bind the monitor to a sync object */
-	msg_addr_offset =
-		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) -
-				monitor_base;
-	size += gaudi_add_arm_monitor_pkt(buf + size, prop->sob_id,
-			prop->sob_val, msg_addr_offset);
+	return size;
+}
+
+u32 gaudi_gen_wait_cb(struct hl_device *hdev,
+		struct hl_gen_wait_properties *prop)
+{
+	struct hl_cb *cb = (struct hl_cb *) prop->data;
+	void *buf = (void *) (uintptr_t) cb->kernel_address;
+	u64 fence_addr = 0;
+	u32 size = prop->size;
 
-	/* Fence packet */
+	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
+		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
+				prop->q_idx);
+		return 0;
+	}
+
+	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
+	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
+			prop->sob_mask, prop->sob_val, prop->mon_id);
 	size += gaudi_add_fence_pkt(buf + size);
+
+	return size;
 }
 
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index cd1366f10fbe..e8bf0b79cd67 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5292,15 +5292,16 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
 	return 0;
 }
 
-static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
+static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
+		u32 size)
 {
-
+	return 0;
 }
 
-static void goya_gen_wait_cb(struct hl_device *hdev,
+static u32 goya_gen_wait_cb(struct hl_device *hdev,
 		struct hl_gen_wait_properties *prop)
 {
-
+	return 0;
 }
 
 static void goya_reset_sob(struct hl_device *hdev, void *data)
-- 
2.17.1


  parent reply	other threads:[~2020-11-02 19:58 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-02 19:57 [PATCH] habanalabs: don't init vm module if no MMU Oded Gabbay
2020-11-02 19:57 ` [PATCH] habanalabs: minimize prints when everything is fine Oded Gabbay
2020-11-02 19:57 ` [PATCH 1/4] habanalabs: sync stream structures refactor Oded Gabbay
2020-11-02 19:58 ` Oded Gabbay [this message]
2020-11-02 19:58 ` [PATCH 3/4] habanalabs: sync stream refactor functions Oded Gabbay
2020-11-02 19:58 ` [PATCH 4/4] habanalabs: remove duplicate check Oded Gabbay

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201102195802.10608-4-ogabbay@kernel.org \
    --to=ogabbay@kernel.org \
    --cc=SW_Drivers@habana.ai \
    --cc=linux-kernel@vger.kernel.org \
    --cc=obitton@habana.ai \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.