From: Leon Romanovsky <leon@kernel.org>
To: Junxian Huang <huangjunxian6@hisilicon.com>
Cc: jgg@ziepe.ca, linux-rdma@vger.kernel.org, linuxarm@huawei.com,
tangchengchang@huawei.com
Subject: Re: [PATCH for-next] RDMA/hns: Fix mbox timing out by adding retry mechanism
Date: Mon, 3 Feb 2025 13:46:59 +0200 [thread overview]
Message-ID: <20250203114659.GE74886@unreal> (raw)
In-Reply-To: <20250123012930.2049043-1-huangjunxian6@hisilicon.com>
On Thu, Jan 23, 2025 at 09:29:30AM +0800, Junxian Huang wrote:
> If a QP is modified to error state and a flush CQE process is triggered,
> the subsequent QP destruction mbox can still be successfully posted but
> will be blocked in HW until the flush CQE process finishes. This causes
> further mbox posting timeouts in driver. The blocking time is related
> to QP depth. Considering an extreme case where SQ depth and RQ depth
> are both 32K, the blocking time can reach about 135ms.
>
> This patch adds a retry mechanism for mbox posting. For each try, FW
> waits 15ms for HW to complete the previous mbox, otherwise return a
> timeout error code to driver. Counting other time consumption in FW,
> set 8 tries for mbox posting and a 5ms time gap before each retry to
> increase to a sufficient timeout limit.
>
> Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space")
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 93 ++++++++++++++++------
> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 6 +-
> 2 files changed, 74 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 5c911d1def03..512866324f59 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1268,24 +1268,27 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
> return -EIO;
> }
>
> -static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
> +static void hns_roce_get_cmdq_param(u16 opcode, u32 *tx_timeout, u8 *try_cnt,
> + u8 *retry_gap_msec)
> {
> - static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
> - {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
> + static const struct hns_roce_cmdq_param_map param[] = {
> + {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT,
> + HNS_ROCE_OPC_POST_MB_TRY_CNT,
> + HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC},
> };
> int i;
>
> - for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
> - if (cmdq_tx_timeout[i].opcode == opcode)
> - return cmdq_tx_timeout[i].tx_timeout;
> -
> - return tx_timeout;
> + for (i = 0; i < ARRAY_SIZE(param); i++)
> + if (param[i].opcode == opcode) {
> + *tx_timeout = param[i].tx_timeout;
> + *try_cnt = param[i].try_cnt;
> + *retry_gap_msec = param[i].retry_gap_msec;
> + return;
> + }
The param is one size array with constant values. There is no need in
any loop and pointer complexity here.
> }
>
> -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
> +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
> {
> - struct hns_roce_v2_priv *priv = hr_dev->priv;
> - u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
> u32 timeout = 0;
>
> do {
> @@ -1295,8 +1298,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
> } while (++timeout < tx_timeout);
> }
>
> -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> - struct hns_roce_cmq_desc *desc, int num)
> +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
> + struct hns_roce_cmq_desc *desc,
> + int num, u32 tx_timeout)
> {
> struct hns_roce_v2_priv *priv = hr_dev->priv;
> struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
> @@ -1305,8 +1309,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> int ret;
> int i;
>
> - spin_lock_bh(&csq->lock);
> -
> tail = csq->head;
>
> for (i = 0; i < num; i++) {
> @@ -1320,22 +1322,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>
> atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
>
> - hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
> + hns_roce_wait_csq_done(hr_dev, tx_timeout);
> if (hns_roce_cmq_csq_done(hr_dev)) {
> ret = 0;
> for (i = 0; i < num; i++) {
> /* check the result of hardware write back */
> - desc[i] = csq->desc[tail++];
> + desc_ret = le16_to_cpu(csq->desc[tail++].retval);
> if (tail == csq->desc_num)
> tail = 0;
> -
> - desc_ret = le16_to_cpu(desc[i].retval);
> if (likely(desc_ret == CMD_EXEC_SUCCESS))
> continue;
>
> - dev_err_ratelimited(hr_dev->dev,
> - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
> - desc->opcode, desc_ret);
> ret = hns_roce_cmd_err_convert_errno(desc_ret);
> }
> } else {
> @@ -1350,14 +1347,62 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> ret = -EAGAIN;
> }
>
> - spin_unlock_bh(&csq->lock);
> -
> if (ret)
> atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
>
> return ret;
> }
>
> +static bool check_cmq_retry(u16 opcode, int ret)
> +{
> + return opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME;
> +}
> +
> +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> + struct hns_roce_cmq_desc *desc, int num)
> +{
> + struct hns_roce_v2_priv *priv = hr_dev->priv;
> + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
> + u16 opcode = le16_to_cpu(desc->opcode);
> + u32 tx_timeout = priv->cmq.tx_timeout;
> + u8 retry_gap_msec = 0;
> + u8 try_cnt = 1;
> + u32 rsv_tail;
> + int ret;
> + int i;
> +
> + hns_roce_get_cmdq_param(opcode, &tx_timeout,
> + &try_cnt, &retry_gap_msec);
You are using constant values, please simplify this patch.
Thanks
> +
> + while (try_cnt) {
> + try_cnt--;
> +
> + spin_lock_bh(&csq->lock);
> + rsv_tail = csq->head;
> + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
> + if (check_cmq_retry(opcode, ret) && try_cnt) {
> + spin_unlock_bh(&csq->lock);
> + mdelay(retry_gap_msec);
> + continue;
> + }
> +
> + for (i = 0; i < num; i++) {
> + desc[i] = csq->desc[rsv_tail++];
> + if (rsv_tail == csq->desc_num)
> + rsv_tail = 0;
> + }
> + spin_unlock_bh(&csq->lock);
> + break;
> + }
> +
> + if (ret)
> + dev_err_ratelimited(hr_dev->dev,
> + "Cmdq IO error, opcode = 0x%x, return = %d.\n",
> + opcode, ret);
> +
> + return ret;
> +}
> +
> static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> struct hns_roce_cmq_desc *desc, int num)
> {
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index cbdbc9edbce6..2e91babf333c 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -230,9 +230,13 @@ enum hns_roce_opcode_type {
> };
>
> #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
> -struct hns_roce_cmdq_tx_timeout_map {
> +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
> +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
> +struct hns_roce_cmdq_param_map {
> u16 opcode;
> u32 tx_timeout;
> + u8 try_cnt;
> + u8 retry_gap_msec;
> };
>
> enum {
> --
> 2.33.0
>
>
prev parent reply other threads:[~2025-02-03 11:47 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-23 1:29 [PATCH for-next] RDMA/hns: Fix mbox timing out by adding retry mechanism Junxian Huang
2025-02-03 11:46 ` Leon Romanovsky [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250203114659.GE74886@unreal \
--to=leon@kernel.org \
--cc=huangjunxian6@hisilicon.com \
--cc=jgg@ziepe.ca \
--cc=linux-rdma@vger.kernel.org \
--cc=linuxarm@huawei.com \
--cc=tangchengchang@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox