From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Ofir Bitton <obitton@habana.ai>
Subject: [PATCH 3/6] habanalabs/gaudi: set arbitration timeout to a high value
Date: Mon, 2 May 2022 11:05:46 +0300 [thread overview]
Message-ID: <20220502080549.230411-3-ogabbay@kernel.org> (raw)
In-Reply-To: <20220502080549.230411-1-ogabbay@kernel.org>
From: Ofir Bitton <obitton@habana.ai>
In certain workloads, arbitration timeout might expire although
no actual issue present. Hence, we set timeout to a very high value.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 7828337eedce..e74ea4c2d008 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -95,7 +95,7 @@
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
-#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
+#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
BIT(GAUDI_ENGINE_ID_MME_0) |\
@@ -2811,9 +2811,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
QMAN_EXTERNAL_MAKE_TRUSTED);
@@ -2990,9 +2989,8 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
@@ -3127,9 +3125,8 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
@@ -3261,9 +3258,8 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
@@ -3412,9 +3408,8 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
--
2.25.1
next prev parent reply other threads:[~2022-05-02 8:06 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-02 8:05 [PATCH 1/6] habanalabs: hide memory manager page shift Oded Gabbay
2022-05-02 8:05 ` [PATCH 2/6] habanalabs: add put by handle method to memory manager Oded Gabbay
2022-05-02 8:05 ` Oded Gabbay [this message]
2022-05-02 8:05 ` [PATCH 4/6] habanalabs: unified memory manager new code for CB flow Oded Gabbay
2022-05-02 8:05 ` [PATCH 5/6] habanalabs: use unified memory manager " Oded Gabbay
2022-05-02 8:05 ` [PATCH 6/6] habanalabs: add device memory scrub ability through debugfs Oded Gabbay
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220502080549.230411-3-ogabbay@kernel.org \
--to=ogabbay@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=obitton@habana.ai \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.