public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Ohad Sharabi <osharabi@habana.ai>
Subject: [PATCH 01/12] habanalabs: fixes to the poll-timeout macros
Date: Mon, 11 Jul 2022 09:29:52 +0300	[thread overview]
Message-ID: <20220711063003.3182795-1-ogabbay@kernel.org> (raw)

From: Ohad Sharabi <osharabi@habana.ai>

- use conventional internal macro variables (double underscore prefix)
- adjust address casting
- on register poll using ELBI use ELBI read rather than BAR read on
  error condition
- remove unused macro

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 119 +++++++++++++++-----
 1 file changed, 90 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 72cb12f2068a..3c51eaca521c 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 /* Timeout should be longer when working with simulator but cap the
  * increased timeout to some maximum
  */
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \
 ({ \
 	ktime_t __timeout; \
+	u32 __elbi_read; \
+	int __rc = 0; \
 	if (hdev->pdev) \
 		__timeout = ktime_add_us(ktime_get(), timeout_us); \
 	else \
@@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 					(u64) HL_SIM_MAX_TIMEOUT_US)); \
 	might_sleep_if(sleep_us); \
 	for (;;) { \
-		(val) = RREG32(addr); \
+		if (elbi) { \
+			__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+			if (__rc) \
+				break; \
+			(val) = __elbi_read; \
+		} else {\
+			(val) = RREG32((u32)addr); \
+		} \
 		if (cond) \
 			break; \
 		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-			(val) = RREG32(addr); \
+			if (elbi) { \
+				__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+				if (__rc) \
+					break; \
+				(val) = __elbi_read; \
+			} else {\
+				(val) = RREG32((u32)addr); \
+			} \
 			break; \
 		} \
 		if (sleep_us) \
 			usleep_range((sleep_us >> 2) + 1, sleep_us); \
 	} \
-	(cond) ? 0 : -ETIMEDOUT; \
+	__rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
 })
 
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+		hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false)
+
+#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \
+		hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true)
+
+/*
+ * poll array of register addresses.
+ * condition is satisfied if all registers values match the expected value.
+ * once some register in the array satisfies the condition it will not be polled again,
+ * this is done both for efficiency and due to some registers are "clear on read".
+ * TODO: use read from PCI bar in other places in the code (SW-91406)
+ */
+#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+						timeout_us, elbi) \
+({ \
+	ktime_t __timeout; \
+	u64 __elem_bitmask; \
+	u32 __read_val;	\
+	u8 __arr_idx;	\
+	int __rc = 0; \
+	\
+	if (hdev->pdev) \
+		__timeout = ktime_add_us(ktime_get(), timeout_us); \
+	else \
+		__timeout = ktime_add_us(ktime_get(),\
+				min(((u64)timeout_us * 10), \
+					(u64) HL_SIM_MAX_TIMEOUT_US)); \
+	\
+	might_sleep_if(sleep_us); \
+	if (arr_size >= 64) \
+		__rc = -EINVAL; \
+	else \
+		__elem_bitmask = BIT_ULL(arr_size) - 1; \
+	for (;;) { \
+		if (__rc) \
+			break; \
+		for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) {	\
+			if (!(__elem_bitmask & BIT_ULL(__arr_idx)))	\
+				continue;	\
+			if (elbi) { \
+				__rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \
+				if (__rc) \
+					break; \
+			} else { \
+				__read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
+			} \
+			if (__read_val == (expected_val))	\
+				__elem_bitmask &= ~BIT_ULL(__arr_idx);	\
+		}	\
+		if (__rc || (__elem_bitmask == 0)) \
+			break; \
+		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
+			break; \
+		if (sleep_us) \
+			usleep_range((sleep_us >> 2) + 1, sleep_us); \
+	} \
+	__rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
+})
+
+#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+					timeout_us) \
+	hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+						timeout_us, false)
+
+#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+					timeout_us) \
+	hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+						timeout_us, true)
+
 /*
  * address in this macro points always to a memory location in the
  * host's (server's) memory. That location is updated asynchronously
@@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	(cond) ? 0 : -ETIMEDOUT; \
 })
 
-#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
-					timeout_us) \
-({ \
-	ktime_t __timeout; \
-	if (hdev->pdev) \
-		__timeout = ktime_add_us(ktime_get(), timeout_us); \
-	else \
-		__timeout = ktime_add_us(ktime_get(),\
-				min((u64)(timeout_us * 10), \
-					(u64) HL_SIM_MAX_TIMEOUT_US)); \
-	might_sleep_if(sleep_us); \
-	for (;;) { \
-		(val) = readl(addr); \
-		if (cond) \
-			break; \
-		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-			(val) = readl(addr); \
-			break; \
-		} \
-		if (sleep_us) \
-			usleep_range((sleep_us >> 2) + 1, sleep_us); \
-	} \
-	(cond) ? 0 : -ETIMEDOUT; \
-})
-
 #define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
 ({ \
 	struct user_mapped_block *p = blk; \
-- 
2.25.1


             reply	other threads:[~2022-07-11  6:30 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-11  6:29 Oded Gabbay [this message]
2022-07-11  6:29 ` [PATCH 02/12] habanalabs: add a value field to hl_fw_send_pci_access_msg() Oded Gabbay
2022-07-11  6:29 ` [PATCH 03/12] habanalabs/gaudi2: configure virtual MSI-X doorbell interface Oded Gabbay
2022-07-11  6:29 ` [PATCH 04/12] habanalabs/gaudi2: replace defines for reserved sob/mob with enums Oded Gabbay
2022-07-11  6:29 ` [PATCH 05/12] habanalabs/gaudi2: modify CS completion CQ to use virtual MSI-X doorbell Oded Gabbay
2022-07-11  6:29 ` [PATCH 06/12] habanalabs/gaudi2: modify decoder " Oded Gabbay
2022-07-11  6:29 ` [PATCH 07/12] habanalabs/gaudi2: map virtual MSI-X doorbell memory for user Oded Gabbay
2022-07-11  6:29 ` [PATCH 08/12] habanalabs: expose only valid debugfs nodes Oded Gabbay
2022-07-11  6:30 ` [PATCH 09/12] habanalabs: fix update of is_in_soft_reset Oded Gabbay
2022-07-11  6:30 ` [PATCH 10/12] habanalabs: add status of reset after device release Oded Gabbay
2022-07-11  6:30 ` [PATCH 11/12] habanalabs: rename soft reset to compute reset Oded Gabbay
2022-07-11  6:30 ` [PATCH 12/12] habanalabs: move h/w dirty message to debug Oded Gabbay

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220711063003.3182795-1-ogabbay@kernel.org \
    --to=ogabbay@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=osharabi@habana.ai \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox