From: Cheng Xu <chengyou@linux.alibaba.com>
To: jgg@ziepe.ca, leon@kernel.org
Cc: linux-rdma@vger.kernel.org, KaiShen@linux.alibaba.com
Subject: [PATCH for-next v2 1/4] RDMA/erdma: Make the device probe process more robust
Date: Wed, 28 Aug 2024 14:09:41 +0800 [thread overview]
Message-ID: <20240828060944.77829-2-chengyou@linux.alibaba.com> (raw)
In-Reply-To: <20240828060944.77829-1-chengyou@linux.alibaba.com>
Driver may probe again while hardware is destroying the internal
resources allocated for previous probing, which will fail the
device probe. To make it more robust, we always issue a reset at the
beginning of the device probe process.
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
drivers/infiniband/hw/erdma/erdma.h | 1 +
drivers/infiniband/hw/erdma/erdma_main.c | 44 +++++++++++++++++++-----
2 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index c8bd698e21b0..b5c258f77ca0 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -94,6 +94,7 @@ enum {
#define ERDMA_CMDQ_TIMEOUT_MS 15000
#define ERDMA_REG_ACCESS_WAIT_MS 20
+#define ERDMA_WAIT_DEV_REST_CNT 50
#define ERDMA_WAIT_DEV_DONE_CNT 500
struct erdma_cmdq {
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 7080f8a71ec4..9199058a0b29 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -209,11 +209,30 @@ static void erdma_device_uninit(struct erdma_dev *dev)
dma_pool_destroy(dev->resp_pool);
}
-static void erdma_hw_reset(struct erdma_dev *dev)
+static int erdma_hw_reset(struct erdma_dev *dev, bool wait)
{
u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
+ int i;
erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
+
+ if (!wait)
+ return 0;
+
+ for (i = 0; i < ERDMA_WAIT_DEV_REST_CNT; i++) {
+ if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
+ ERDMA_REG_DEV_ST_RESET_DONE_MASK))
+ break;
+
+ msleep(ERDMA_REG_ACCESS_WAIT_MS);
+ }
+
+ if (i == ERDMA_WAIT_DEV_REST_CNT) {
+ dev_err(&dev->pdev->dev, "wait reset done timeout.\n");
+ return -ETIME;
+ }
+
+ return 0;
}
static int erdma_wait_hw_init_done(struct erdma_dev *dev)
@@ -239,6 +258,17 @@ static int erdma_wait_hw_init_done(struct erdma_dev *dev)
return 0;
}
+static int erdma_preinit_check(struct erdma_dev *dev)
+{
+ u32 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
+
+ /* we knows that it is a non-functional function. */
+ if (version == 0)
+ return -ENODEV;
+
+ return erdma_hw_reset(dev, true);
+}
+
static const struct pci_device_id erdma_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
{}
@@ -248,7 +278,6 @@ static int erdma_probe_dev(struct pci_dev *pdev)
{
struct erdma_dev *dev;
int bars, err;
- u32 version;
err = pci_enable_device(pdev);
if (err) {
@@ -287,12 +316,9 @@ static int erdma_probe_dev(struct pci_dev *pdev)
goto err_release_bars;
}
- version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
- if (version == 0) {
- /* we knows that it is a non-functional function. */
- err = -ENODEV;
+ err = erdma_preinit_check(dev);
+ if (err)
goto err_iounmap_func_bar;
- }
err = erdma_device_init(dev, pdev);
if (err)
@@ -327,7 +353,7 @@ static int erdma_probe_dev(struct pci_dev *pdev)
return 0;
err_reset_hw:
- erdma_hw_reset(dev);
+ erdma_hw_reset(dev, false);
err_uninit_cmdq:
erdma_cmdq_destroy(dev);
@@ -364,7 +390,7 @@ static void erdma_remove_dev(struct pci_dev *pdev)
struct erdma_dev *dev = pci_get_drvdata(pdev);
erdma_ceqs_uninit(dev);
- erdma_hw_reset(dev);
+ erdma_hw_reset(dev, false);
erdma_cmdq_destroy(dev);
erdma_aeq_destroy(dev);
erdma_comm_irq_uninit(dev);
--
2.31.1
next prev parent reply other threads:[~2024-08-28 6:09 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-28 6:09 [PATCH for-next v2 0/4] RDMA/erdma: erdma updates Cheng Xu
2024-08-28 6:09 ` Cheng Xu [this message]
2024-08-29 10:09 ` [PATCH for-next v2 1/4] RDMA/erdma: Make the device probe process more robust Leon Romanovsky
2024-08-30 2:34 ` Cheng Xu
2024-09-02 7:21 ` Leon Romanovsky
2024-09-02 9:09 ` Cheng Xu
2024-09-04 16:06 ` Jason Gunthorpe
2024-09-05 3:39 ` Cheng Xu
2024-08-28 6:09 ` [PATCH for-next v2 2/4] RDMA/erdma: Refactor the initialization and destruction of EQ Cheng Xu
2024-08-28 6:09 ` [PATCH for-next v2 3/4] RDMA/erdma: Add disassociate ucontext support Cheng Xu
2024-08-28 6:09 ` [PATCH for-next v2 4/4] RDMA/erdma: Return QP state in erdma_query_qp Cheng Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240828060944.77829-2-chengyou@linux.alibaba.com \
--to=chengyou@linux.alibaba.com \
--cc=KaiShen@linux.alibaba.com \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox