From: Selvin Xavier <selvin.xavier@broadcom.com>
To: jgg@ziepe.ca, leon@kernel.org
Cc: linux-rdma@vger.kernel.org, andrew.gospodarek@broadcom.com,
Kashyap Desai <kashyap.desai@broadcom.com>,
Selvin Xavier <selvin.xavier@broadcom.com>
Subject: [PATCH for-next 15/17] RDMA/bnxt_re: use firmware provided max request timeout
Date: Thu, 8 Jun 2023 03:25:06 -0700 [thread overview]
Message-ID: <1686219908-11181-16-git-send-email-selvin.xavier@broadcom.com> (raw)
In-Reply-To: <1686219908-11181-1-git-send-email-selvin.xavier@broadcom.com>
[-- Attachment #1: Type: text/plain, Size: 6287 bytes --]
From: Kashyap Desai <kashyap.desai@broadcom.com>
Firmware provides max request timeout value as part of hwrm_ver_get
API. Driver gets the timeout from firmware and if that interface is
not available then fall back to hardcoded timeout value.
Also, Add a helper function to check the FW status.
Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/main.c | 8 +++++
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 57 ++++++++++++++++++++++++------
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 4 ++-
drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 +
4 files changed, 58 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 8241154..a2c7d3f 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1041,6 +1041,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_ver_get_output resp = {0};
struct hwrm_ver_get_input req = {0};
+ struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_fw_msg fw_msg;
int rc = 0;
@@ -1058,11 +1059,18 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
rc);
return;
}
+
+ cctx = rdev->chip_ctx;
rdev->qplib_ctx.hwrm_intf_ver =
(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
le16_to_cpu(resp.hwrm_intf_patch);
+
+ cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
+
+ if (!cctx->hwrm_cmd_max_timeout)
+ cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
}
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 4f36644..8d08715 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -90,6 +90,41 @@ static int bnxt_qplib_map_rc(u8 opcode)
}
/**
+ * bnxt_re_is_fw_stalled - Check firmware health
+ * @rcfw - rcfw channel instance of rdev
+ * @cookie - cookie to track the command
+ * @opcode - rcfw submitted for given opcode
+ * @cbit - bitmap entry of cookie
+ *
+ * If firmware has not responded any rcfw command within
+ * rcfw->max_timeout, consider firmware as stalled.
+ *
+ * Returns:
+ * 0 if firmware is responding
+ * -ENODEV if firmware is not responding
+ */
+static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
+ u16 cookie, u8 opcode, u16 cbit)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+
+ cmdq = &rcfw->cmdq;
+
+ if (time_after(jiffies, cmdq->last_seen +
+ (rcfw->max_timeout * HZ))) {
+ dev_warn_ratelimited(&rcfw->pdev->dev,
+ "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
+ __func__, cookie, opcode,
+ jiffies_to_msecs(jiffies - cmdq->last_seen),
+ rcfw->max_timeout * 1000,
+ test_bit(cbit, cmdq->cmdq_bitmap));
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
* __wait_for_resp - Don't hold the cpu context and wait for response
* @rcfw - rcfw channel instance of rdev
* @cookie - cookie to track the command
@@ -120,8 +155,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
ret = wait_event_timeout(cmdq->waitq,
!test_bit(cbit, cmdq->cmdq_bitmap) ||
test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
- msecs_to_jiffies(RCFW_FW_STALL_TIMEOUT_SEC
- * 1000));
+ msecs_to_jiffies(rcfw->max_timeout * 1000));
if (!test_bit(cbit, cmdq->cmdq_bitmap))
return 0;
@@ -131,10 +165,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
if (!test_bit(cbit, cmdq->cmdq_bitmap))
return 0;
- /* Firmware stall is detected */
- if (time_after(jiffies, cmdq->last_seen +
- (RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
- return -ENODEV;
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
+ if (ret)
+ return ret;
} while (true);
};
@@ -357,6 +390,7 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
unsigned long issue_time;
u16 cbit;
+ int ret;
cbit = cookie % rcfw->cmdq_depth;
issue_time = jiffies;
@@ -373,11 +407,10 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
if (!test_bit(cbit, cmdq->cmdq_bitmap))
return 0;
if (jiffies_to_msecs(jiffies - issue_time) >
- (RCFW_FW_STALL_TIMEOUT_SEC * 1000)) {
- /* Firmware stall is detected */
- if (time_after(jiffies, cmdq->last_seen +
- (RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
- return -ENODEV;
+ (rcfw->max_timeout * 1000)) {
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
+ if (ret)
+ return ret;
}
} while (true);
};
@@ -957,6 +990,8 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->qp_tbl)
goto fail;
+ rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
+
return 0;
fail:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 338bf6a..b644dcc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -51,7 +51,7 @@
#define RCFW_DBR_PCI_BAR_REGION 2
#define RCFW_DBR_BASE_PAGE_SHIFT 12
-#define RCFW_FW_STALL_TIMEOUT_SEC 40
+#define RCFW_FW_STALL_MAX_TIMEOUT 40
/* Cmdq contains a fix number of a 16-Byte slots */
struct bnxt_qplib_cmdqe {
@@ -227,6 +227,8 @@ struct bnxt_qplib_rcfw {
atomic_t rcfw_intr_enabled;
struct semaphore rcfw_inflight;
atomic_t timeout_send;
+ /* cached from chip cctx for quick reference in slow path */
+ u16 max_timeout;
};
struct bnxt_qplib_cmdqmsg {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 982e2c9..77f0b84 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -55,6 +55,7 @@ struct bnxt_qplib_chip_ctx {
u8 chip_rev;
u8 chip_metal;
u16 hw_stats_size;
+ u16 hwrm_cmd_max_timeout;
struct bnxt_qplib_drv_modes modes;
};
--
2.5.5
[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4224 bytes --]
next prev parent reply other threads:[~2023-06-08 10:37 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-08 10:24 [PATCH for-next 00/17] RDMA/bnxt_re: Control path updates Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 01/17] RDMA/bnxt_re: wraparound mbox producer index Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 02/17] RDMA/bnxt_re: Avoid calling wake_up threads from spin_lock context Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 03/17] RDMA/bnxt_re: remove virt_func check while creating RoCE FW channel Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 04/17] RDMA/bnxt_re: set fixed command queue depth Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 05/17] RDMA/bnxt_re: Enhance the existing functions that wait for FW responses Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 06/17] RDMA/bnxt_re: Avoid the command wait if firmware is inactive Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 07/17] RDMA/bnxt_re: use shadow qd while posting non blocking rcfw command Selvin Xavier
2023-06-08 10:24 ` [PATCH for-next 08/17] RDMA/bnxt_re: Simplify the function that sends the FW commands Selvin Xavier
2023-06-08 10:25 ` [PATCH for-next 09/17] RDMA/bnxt_re: add helper function __poll_for_resp Selvin Xavier
2023-06-08 10:25 ` [PATCH for-next 10/17] RDMA/bnxt_re: handle command completions after driver detect a timedout Selvin Xavier
2023-06-08 12:53 ` kernel test robot
2023-06-08 10:25 ` [PATCH for-next 11/17] RDMA/bnxt_re: Add firmware stall check detection Selvin Xavier
2023-06-08 10:25 ` [PATCH for-next 12/17] RDMA/bnxt_re: post destroy_ah for delayed completion of AH creation Selvin Xavier
2023-06-08 10:25 ` [PATCH for-next 13/17] RDMA/bnxt_re: consider timeout of destroy ah as success Selvin Xavier
2023-06-08 10:25 ` [PATCH for-next 14/17] RDMA/bnxt_re: cancel all control path command waiters upon error Selvin Xavier
2023-06-08 10:25 ` Selvin Xavier [this message]
2023-06-08 10:25 ` [PATCH for-next 16/17] RDMA/bnxt_re: remove redundant cmdq_bitmap Selvin Xavier
2023-06-08 10:25 ` [PATCH for-next 17/17] RDMA/bnxt_re: optimize the parameters passed to helper functions Selvin Xavier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1686219908-11181-16-git-send-email-selvin.xavier@broadcom.com \
--to=selvin.xavier@broadcom.com \
--cc=andrew.gospodarek@broadcom.com \
--cc=jgg@ziepe.ca \
--cc=kashyap.desai@broadcom.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox