public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Cheng Xu <chengyou@linux.alibaba.com>
Cc: jgg@ziepe.ca, linux-rdma@vger.kernel.org, KaiShen@linux.alibaba.com
Subject: Re: [PATCH for-next v2 1/4] RDMA/erdma: Make the device probe process more robust
Date: Thu, 29 Aug 2024 13:09:55 +0300	[thread overview]
Message-ID: <20240829100955.GB26654@unreal> (raw)
In-Reply-To: <20240828060944.77829-2-chengyou@linux.alibaba.com>

On Wed, Aug 28, 2024 at 02:09:41PM +0800, Cheng Xu wrote:
> Driver may probe again while hardware is destroying the internal
> resources allocated for previous probing

How is it possible?


> which will fail the device probe. To make it more robust, we always issue a reset at the
> beginning of the device probe process.
> 
> Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
> ---
>  drivers/infiniband/hw/erdma/erdma.h      |  1 +
>  drivers/infiniband/hw/erdma/erdma_main.c | 44 +++++++++++++++++++-----
>  2 files changed, 36 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
> index c8bd698e21b0..b5c258f77ca0 100644
> --- a/drivers/infiniband/hw/erdma/erdma.h
> +++ b/drivers/infiniband/hw/erdma/erdma.h
> @@ -94,6 +94,7 @@ enum {
>  
>  #define ERDMA_CMDQ_TIMEOUT_MS 15000
>  #define ERDMA_REG_ACCESS_WAIT_MS 20
> +#define ERDMA_WAIT_DEV_REST_CNT 50
>  #define ERDMA_WAIT_DEV_DONE_CNT 500
>  
>  struct erdma_cmdq {
> diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
> index 7080f8a71ec4..9199058a0b29 100644
> --- a/drivers/infiniband/hw/erdma/erdma_main.c
> +++ b/drivers/infiniband/hw/erdma/erdma_main.c
> @@ -209,11 +209,30 @@ static void erdma_device_uninit(struct erdma_dev *dev)
>  	dma_pool_destroy(dev->resp_pool);
>  }
>  
> -static void erdma_hw_reset(struct erdma_dev *dev)
> +static int erdma_hw_reset(struct erdma_dev *dev, bool wait)
>  {
>  	u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
> +	int i;
>  
>  	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
> +
> +	if (!wait)
> +		return 0;
> +
> +	for (i = 0; i < ERDMA_WAIT_DEV_REST_CNT; i++) {
> +		if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
> +					   ERDMA_REG_DEV_ST_RESET_DONE_MASK))
> +			break;
> +
> +		msleep(ERDMA_REG_ACCESS_WAIT_MS);
> +	}
> +
> +	if (i == ERDMA_WAIT_DEV_REST_CNT) {
> +		dev_err(&dev->pdev->dev, "wait reset done timeout.\n");
> +		return -ETIME;
> +	}
> +
> +	return 0;
>  }
>  
>  static int erdma_wait_hw_init_done(struct erdma_dev *dev)
> @@ -239,6 +258,17 @@ static int erdma_wait_hw_init_done(struct erdma_dev *dev)
>  	return 0;
>  }
>  
> +static int erdma_preinit_check(struct erdma_dev *dev)
> +{
> +	u32 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
> +
> +	/* we knows that it is a non-functional function. */
> +	if (version == 0)
> +		return -ENODEV;
> +
> +	return erdma_hw_reset(dev, true);
> +}
> +
>  static const struct pci_device_id erdma_pci_tbl[] = {
>  	{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
>  	{}
> @@ -248,7 +278,6 @@ static int erdma_probe_dev(struct pci_dev *pdev)
>  {
>  	struct erdma_dev *dev;
>  	int bars, err;
> -	u32 version;
>  
>  	err = pci_enable_device(pdev);
>  	if (err) {
> @@ -287,12 +316,9 @@ static int erdma_probe_dev(struct pci_dev *pdev)
>  		goto err_release_bars;
>  	}
>  
> -	version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
> -	if (version == 0) {
> -		/* we knows that it is a non-functional function. */
> -		err = -ENODEV;
> +	err = erdma_preinit_check(dev);
> +	if (err)
>  		goto err_iounmap_func_bar;
> -	}
>  
>  	err = erdma_device_init(dev, pdev);
>  	if (err)
> @@ -327,7 +353,7 @@ static int erdma_probe_dev(struct pci_dev *pdev)
>  	return 0;
>  
>  err_reset_hw:
> -	erdma_hw_reset(dev);
> +	erdma_hw_reset(dev, false);
>  
>  err_uninit_cmdq:
>  	erdma_cmdq_destroy(dev);
> @@ -364,7 +390,7 @@ static void erdma_remove_dev(struct pci_dev *pdev)
>  	struct erdma_dev *dev = pci_get_drvdata(pdev);
>  
>  	erdma_ceqs_uninit(dev);
> -	erdma_hw_reset(dev);
> +	erdma_hw_reset(dev, false);
>  	erdma_cmdq_destroy(dev);
>  	erdma_aeq_destroy(dev);
>  	erdma_comm_irq_uninit(dev);
> -- 
> 2.31.1
> 
> 

  reply	other threads:[~2024-08-29 10:10 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-28  6:09 [PATCH for-next v2 0/4] RDMA/erdma: erdma updates Cheng Xu
2024-08-28  6:09 ` [PATCH for-next v2 1/4] RDMA/erdma: Make the device probe process more robust Cheng Xu
2024-08-29 10:09   ` Leon Romanovsky [this message]
2024-08-30  2:34     ` Cheng Xu
2024-09-02  7:21       ` Leon Romanovsky
2024-09-02  9:09         ` Cheng Xu
2024-09-04 16:06           ` Jason Gunthorpe
2024-09-05  3:39             ` Cheng Xu
2024-08-28  6:09 ` [PATCH for-next v2 2/4] RDMA/erdma: Refactor the initialization and destruction of EQ Cheng Xu
2024-08-28  6:09 ` [PATCH for-next v2 3/4] RDMA/erdma: Add disassociate ucontext support Cheng Xu
2024-08-28  6:09 ` [PATCH for-next v2 4/4] RDMA/erdma: Return QP state in erdma_query_qp Cheng Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240829100955.GB26654@unreal \
    --to=leon@kernel.org \
    --cc=KaiShen@linux.alibaba.com \
    --cc=chengyou@linux.alibaba.com \
    --cc=jgg@ziepe.ca \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox