Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Junxian Huang <huangjunxian6@hisilicon.com>
Cc: jgg@ziepe.ca, linux-rdma@vger.kernel.org, linuxarm@huawei.com,
	tangchengchang@huawei.com
Subject: Re: [PATCH for-next] RDMA/hns: Fix mbox timing out by adding retry mechanism
Date: Mon, 3 Feb 2025 13:46:59 +0200	[thread overview]
Message-ID: <20250203114659.GE74886@unreal> (raw)
In-Reply-To: <20250123012930.2049043-1-huangjunxian6@hisilicon.com>

On Thu, Jan 23, 2025 at 09:29:30AM +0800, Junxian Huang wrote:
> If a QP is modified to error state and a flush CQE process is triggered,
> the subsequent QP destruction mbox can still be successfully posted but
> will be blocked in HW until the flush CQE process finishes. This causes
> further mbox posting timeouts in driver. The blocking time is related
> to QP depth. Considering an extreme case where SQ depth and RQ depth
> are both 32K, the blocking time can reach about 135ms.
> 
> This patch adds a retry mechanism for mbox posting. For each try, FW
> waits 15ms for HW to complete the previous mbox, otherwise return a
> timeout error code to driver. Counting other time consumption in FW,
> set 8 tries for mbox posting and a 5ms time gap before each retry to
> increase to a sufficient timeout limit.
> 
> Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space")
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 93 ++++++++++++++++------
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  6 +-
>  2 files changed, 74 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 5c911d1def03..512866324f59 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1268,24 +1268,27 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
>  	return -EIO;
>  }
>  
> -static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
> +static void hns_roce_get_cmdq_param(u16 opcode, u32 *tx_timeout, u8 *try_cnt,
> +				    u8 *retry_gap_msec)
>  {
> -	static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
> -		{HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
> +	static const struct hns_roce_cmdq_param_map param[] = {
> +		{HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT,
> +		 HNS_ROCE_OPC_POST_MB_TRY_CNT,
> +		 HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC},
>  	};
>  	int i;
>  
> -	for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
> -		if (cmdq_tx_timeout[i].opcode == opcode)
> -			return cmdq_tx_timeout[i].tx_timeout;
> -
> -	return tx_timeout;
> +	for (i = 0; i < ARRAY_SIZE(param); i++)
> +		if (param[i].opcode == opcode) {
> +			*tx_timeout = param[i].tx_timeout;
> +			*try_cnt = param[i].try_cnt;
> +			*retry_gap_msec = param[i].retry_gap_msec;
> +			return;
> +		}

The param is one size array with constant values. There is no need in
any loop and pointer complexity here.

>  }
>  
> -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
> +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
>  {
> -	struct hns_roce_v2_priv *priv = hr_dev->priv;
> -	u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
>  	u32 timeout = 0;
>  
>  	do {
> @@ -1295,8 +1298,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
>  	} while (++timeout < tx_timeout);
>  }
>  
> -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> -			       struct hns_roce_cmq_desc *desc, int num)
> +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
> +				   struct hns_roce_cmq_desc *desc,
> +				   int num, u32 tx_timeout)
>  {
>  	struct hns_roce_v2_priv *priv = hr_dev->priv;
>  	struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
> @@ -1305,8 +1309,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  	int ret;
>  	int i;
>  
> -	spin_lock_bh(&csq->lock);
> -
>  	tail = csq->head;
>  
>  	for (i = 0; i < num; i++) {
> @@ -1320,22 +1322,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  
>  	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
>  
> -	hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
> +	hns_roce_wait_csq_done(hr_dev, tx_timeout);
>  	if (hns_roce_cmq_csq_done(hr_dev)) {
>  		ret = 0;
>  		for (i = 0; i < num; i++) {
>  			/* check the result of hardware write back */
> -			desc[i] = csq->desc[tail++];
> +			desc_ret = le16_to_cpu(csq->desc[tail++].retval);
>  			if (tail == csq->desc_num)
>  				tail = 0;
> -
> -			desc_ret = le16_to_cpu(desc[i].retval);
>  			if (likely(desc_ret == CMD_EXEC_SUCCESS))
>  				continue;
>  
> -			dev_err_ratelimited(hr_dev->dev,
> -					    "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
> -					    desc->opcode, desc_ret);
>  			ret = hns_roce_cmd_err_convert_errno(desc_ret);
>  		}
>  	} else {
> @@ -1350,14 +1347,62 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  		ret = -EAGAIN;
>  	}
>  
> -	spin_unlock_bh(&csq->lock);
> -
>  	if (ret)
>  		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
>  
>  	return ret;
>  }
>  
> +static bool check_cmq_retry(u16 opcode, int ret)
> +{
> +	return opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME;
> +}
> +
> +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> +			       struct hns_roce_cmq_desc *desc, int num)
> +{
> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
> +	struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
> +	u16 opcode = le16_to_cpu(desc->opcode);
> +	u32 tx_timeout = priv->cmq.tx_timeout;
> +	u8 retry_gap_msec = 0;
> +	u8 try_cnt = 1;
> +	u32 rsv_tail;
> +	int ret;
> +	int i;
> +
> +	hns_roce_get_cmdq_param(opcode, &tx_timeout,
> +				&try_cnt, &retry_gap_msec);

You are using constant values, please simplify this patch.

Thanks

> +
> +	while (try_cnt) {
> +		try_cnt--;
> +
> +		spin_lock_bh(&csq->lock);
> +		rsv_tail = csq->head;
> +		ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
> +		if (check_cmq_retry(opcode, ret) && try_cnt) {
> +			spin_unlock_bh(&csq->lock);
> +			mdelay(retry_gap_msec);
> +			continue;
> +		}
> +
> +		for (i = 0; i < num; i++) {
> +			desc[i] = csq->desc[rsv_tail++];
> +			if (rsv_tail == csq->desc_num)
> +				rsv_tail = 0;
> +		}
> +		spin_unlock_bh(&csq->lock);
> +		break;
> +	}
> +
> +	if (ret)
> +		dev_err_ratelimited(hr_dev->dev,
> +				    "Cmdq IO error, opcode = 0x%x, return = %d.\n",
> +				    opcode, ret);
> +
> +	return ret;
> +}
> +
>  static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  			     struct hns_roce_cmq_desc *desc, int num)
>  {
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index cbdbc9edbce6..2e91babf333c 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -230,9 +230,13 @@ enum hns_roce_opcode_type {
>  };
>  
>  #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
> -struct hns_roce_cmdq_tx_timeout_map {
> +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
> +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
> +struct hns_roce_cmdq_param_map {
>  	u16 opcode;
>  	u32 tx_timeout;
> +	u8 try_cnt;
> +	u8 retry_gap_msec;
>  };
>  
>  enum {
> -- 
> 2.33.0
> 
> 

      reply	other threads:[~2025-02-03 11:47 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-23  1:29 [PATCH for-next] RDMA/hns: Fix mbox timing out by adding retry mechanism Junxian Huang
2025-02-03 11:46 ` Leon Romanovsky [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250203114659.GE74886@unreal \
    --to=leon@kernel.org \
    --cc=huangjunxian6@hisilicon.com \
    --cc=jgg@ziepe.ca \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=tangchengchang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox