From: Cheng Xu <chengyou@linux.alibaba.com>
To: jgg@ziepe.ca, leon@kernel.org
Cc: linux-rdma@vger.kernel.org, KaiShen@linux.alibaba.com
Subject: [PATCH for-next v2 1/4] RDMA/erdma: Make the device probe process more robust
Date: Wed, 28 Aug 2024 14:09:41 +0800 [thread overview]
Message-ID: <20240828060944.77829-2-chengyou@linux.alibaba.com> (raw)
In-Reply-To: <20240828060944.77829-1-chengyou@linux.alibaba.com>
Driver may probe again while hardware is destroying the internal
resources allocated for previous probing, which will fail the
device probe. To make it more robust, we always issue a reset at the
beginning of the device probe process.
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
drivers/infiniband/hw/erdma/erdma.h | 1 +
drivers/infiniband/hw/erdma/erdma_main.c | 44 +++++++++++++++++++-----
2 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index c8bd698e21b0..b5c258f77ca0 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -94,6 +94,7 @@ enum {
#define ERDMA_CMDQ_TIMEOUT_MS 15000
#define ERDMA_REG_ACCESS_WAIT_MS 20
+#define ERDMA_WAIT_DEV_REST_CNT 50
#define ERDMA_WAIT_DEV_DONE_CNT 500
struct erdma_cmdq {
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 7080f8a71ec4..9199058a0b29 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -209,11 +209,30 @@ static void erdma_device_uninit(struct erdma_dev *dev)
dma_pool_destroy(dev->resp_pool);
}
-static void erdma_hw_reset(struct erdma_dev *dev)
+static int erdma_hw_reset(struct erdma_dev *dev, bool wait)
{
u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
+ int i;
erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
+
+ if (!wait)
+ return 0;
+
+ for (i = 0; i < ERDMA_WAIT_DEV_REST_CNT; i++) {
+ if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
+ ERDMA_REG_DEV_ST_RESET_DONE_MASK))
+ break;
+
+ msleep(ERDMA_REG_ACCESS_WAIT_MS);
+ }
+
+ if (i == ERDMA_WAIT_DEV_REST_CNT) {
+ dev_err(&dev->pdev->dev, "wait reset done timeout.\n");
+ return -ETIME;
+ }
+
+ return 0;
}
static int erdma_wait_hw_init_done(struct erdma_dev *dev)
@@ -239,6 +258,17 @@ static int erdma_wait_hw_init_done(struct erdma_dev *dev)
return 0;
}
+static int erdma_preinit_check(struct erdma_dev *dev)
+{
+ u32 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
+
+ /* we knows that it is a non-functional function. */
+ if (version == 0)
+ return -ENODEV;
+
+ return erdma_hw_reset(dev, true);
+}
+
static const struct pci_device_id erdma_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
{}
@@ -248,7 +278,6 @@ static int erdma_probe_dev(struct pci_dev *pdev)
{
struct erdma_dev *dev;
int bars, err;
- u32 version;
err = pci_enable_device(pdev);
if (err) {
@@ -287,12 +316,9 @@ static int erdma_probe_dev(struct pci_dev *pdev)
goto err_release_bars;
}
- version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
- if (version == 0) {
- /* we knows that it is a non-functional function. */
- err = -ENODEV;
+ err = erdma_preinit_check(dev);
+ if (err)
goto err_iounmap_func_bar;
- }
err = erdma_device_init(dev, pdev);
if (err)
@@ -327,7 +353,7 @@ static int erdma_probe_dev(struct pci_dev *pdev)
return 0;
err_reset_hw:
- erdma_hw_reset(dev);
+ erdma_hw_reset(dev, false);
err_uninit_cmdq:
erdma_cmdq_destroy(dev);
@@ -364,7 +390,7 @@ static void erdma_remove_dev(struct pci_dev *pdev)
struct erdma_dev *dev = pci_get_drvdata(pdev);
erdma_ceqs_uninit(dev);
- erdma_hw_reset(dev);
+ erdma_hw_reset(dev, false);
erdma_cmdq_destroy(dev);
erdma_aeq_destroy(dev);
erdma_comm_irq_uninit(dev);
--
2.31.1
next prev parent reply other threads:[~2024-08-28 6:09 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-28 6:09 [PATCH for-next v2 0/4] RDMA/erdma: erdma updates Cheng Xu
2024-08-28 6:09 ` Cheng Xu [this message]
2024-08-29 10:09 ` [PATCH for-next v2 1/4] RDMA/erdma: Make the device probe process more robust Leon Romanovsky
2024-08-30 2:34 ` Cheng Xu
2024-09-02 7:21 ` Leon Romanovsky
2024-09-02 9:09 ` Cheng Xu
2024-09-04 16:06 ` Jason Gunthorpe
2024-09-05 3:39 ` Cheng Xu
2024-08-28 6:09 ` [PATCH for-next v2 2/4] RDMA/erdma: Refactor the initialization and destruction of EQ Cheng Xu
2024-08-28 6:09 ` [PATCH for-next v2 3/4] RDMA/erdma: Add disassociate ucontext support Cheng Xu
2024-08-28 6:09 ` [PATCH for-next v2 4/4] RDMA/erdma: Return QP state in erdma_query_qp Cheng Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240828060944.77829-2-chengyou@linux.alibaba.com \
--to=chengyou@linux.alibaba.com \
--cc=KaiShen@linux.alibaba.com \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.