From: longli@linux.microsoft.com
To: "K. Y. Srinivasan" <kys@microsoft.com>,
Haiyang Zhang <haiyangz@microsoft.com>,
Wei Liu <wei.liu@kernel.org>, Dexuan Cui <decui@microsoft.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Shradha Gupta <shradhagupta@linux.microsoft.com>,
Simon Horman <horms@kernel.org>,
Konstantin Taranov <kotaranov@microsoft.com>,
Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>,
Erick Archer <erick.archer@outlook.com>,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org
Cc: Long Li <longli@microsoft.com>
Subject: [patch net-next] net: mana: Handle hardware reset events when probing the device
Date: Fri, 14 Nov 2025 18:28:49 -0800 [thread overview]
Message-ID: <1763173729-28430-1-git-send-email-longli@linux.microsoft.com> (raw)
From: Long Li <longli@microsoft.com>
When MANA is being probed, it's possible that hardware is in recovery
mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC in the
middle of the probe. Detect such condition and go through the recovery
service procedure.
Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
Signed-off-by: Long Li <longli@microsoft.com>
---
.../net/ethernet/microsoft/mana/gdma_main.c | 131 +++++++++++++++---
include/net/mana/gdma.h | 9 +-
2 files changed, 122 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index effe0a2f207a..1d9c2beb22b2 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -15,6 +15,12 @@
struct dentry *mana_debugfs_root;
+static struct mana_serv_delayed_work {
+ struct delayed_work work;
+ struct pci_dev *pdev;
+ enum gdma_eqe_type type;
+} mns_delayed_wk;
+
static u32 mana_gd_r32(struct gdma_context *g, u64 offset)
{
return readl(g->bar0_va + offset);
@@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
#define MANA_SERVICE_PERIOD 10
+static void mana_serv_rescan(struct pci_dev *pdev)
+{
+ struct pci_bus *parent;
+
+ pci_lock_rescan_remove();
+
+ parent = pdev->bus;
+ if (!parent) {
+ dev_err(&pdev->dev, "MANA service: no parent bus\n");
+ goto out;
+ }
+
+ pci_stop_and_remove_bus_device(pdev);
+ pci_rescan_bus(parent);
+
+out:
+ pci_unlock_rescan_remove();
+}
+
static void mana_serv_fpga(struct pci_dev *pdev)
{
struct pci_bus *bus, *parent;
@@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
struct hw_channel_context *hwc;
+ int ret;
if (!gc) {
- dev_err(&pdev->dev, "MANA service: no GC\n");
+ /* Perform PCI rescan on device if GC is not set up */
+ dev_err(&pdev->dev, "MANA service: GC not setup, rescanning\n");
+ mana_serv_rescan(pdev);
return;
}
@@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
msleep(MANA_SERVICE_PERIOD * 1000);
- mana_gd_resume(pdev);
+ ret = mana_gd_resume(pdev);
+ if (ret == -ETIMEDOUT || ret == -EPROTO) {
+ /* Perform PCI rescan on device if we failed on HWC */
+ dev_err(&pdev->dev, "MANA service: resume failed, rescanning\n");
+ mana_serv_rescan(pdev);
+ goto out;
+ }
- dev_info(&pdev->dev, "MANA reset cycle completed\n");
+ if (ret)
+ dev_info(&pdev->dev, "MANA reset cycle failed err %d\n", ret);
+ else
+ dev_info(&pdev->dev, "MANA reset cycle completed\n");
out:
gc->in_service = false;
@@ -454,18 +491,9 @@ struct mana_serv_work {
enum gdma_eqe_type type;
};
-static void mana_serv_func(struct work_struct *w)
+static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)
{
- struct mana_serv_work *mns_wk;
- struct pci_dev *pdev;
-
- mns_wk = container_of(w, struct mana_serv_work, serv_work);
- pdev = mns_wk->pdev;
-
- if (!pdev)
- goto out;
-
- switch (mns_wk->type) {
+ switch (type) {
case GDMA_EQE_HWC_FPGA_RECONFIG:
mana_serv_fpga(pdev);
break;
@@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct *w)
break;
default:
- dev_err(&pdev->dev, "MANA service: unknown type %d\n",
- mns_wk->type);
+ dev_err(&pdev->dev, "MANA service: unknown type %d\n", type);
break;
}
+}
+
+static void mana_serv_delayed_func(struct work_struct *w)
+{
+ struct mana_serv_delayed_work *dwork;
+ struct pci_dev *pdev;
+
+ dwork = container_of(w, struct mana_serv_delayed_work, work.work);
+ pdev = dwork->pdev;
+
+ if (pdev)
+ mana_do_service(dwork->type, pdev);
+
+ pci_dev_put(pdev);
+}
+
+static void mana_serv_func(struct work_struct *w)
+{
+ struct mana_serv_work *mns_wk;
+ struct pci_dev *pdev;
+
+ mns_wk = container_of(w, struct mana_serv_work, serv_work);
+ pdev = mns_wk->pdev;
+
+ if (pdev)
+ mana_do_service(mns_wk->type, pdev);
-out:
pci_dev_put(pdev);
kfree(mns_wk);
module_put(THIS_MODULE);
@@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
case GDMA_EQE_HWC_RESET_REQUEST:
dev_info(gc->dev, "Recv MANA service type:%d\n", type);
+ if (atomic_inc_return(&gc->in_probe) == 1) {
+ /*
+ * Device is in probe and we received an hardware reset
+ * event, probe() will detect that "in_probe" has
+ * changed and perform service procedure.
+ */
+ dev_info(gc->dev,
+ "Service is to be processed in probe\n");
+ break;
+ }
+
if (gc->in_service) {
dev_info(gc->dev, "Already in service\n");
break;
@@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev->slot),
mana_debugfs_root);
+ atomic_set(&gc->in_probe, 0);
+
err = mana_gd_setup(pdev);
if (err)
goto unmap_bar;
@@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto cleanup_mana;
+ /*
+ * If a hardware reset event has occurred over HWC during probe,
+ * rollback and perform hardware reset procedure.
+ */
+ if (atomic_inc_return(&gc->in_probe) > 1) {
+ err = -EPROTO;
+ goto cleanup_mana_rdma;
+ }
+
return 0;
+cleanup_mana_rdma:
+ mana_rdma_remove(&gc->mana_ib);
cleanup_mana:
mana_remove(&gc->mana, false);
cleanup_gd:
@@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
disable_dev:
pci_disable_device(pdev);
dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
+
+ /*
+ * Hardware could be in recovery mode and the HWC returns TIMEDOUT or
+ * EPROTO from mana_gd_setup(), mana_probe() or mana_rdma_probe(), or
+ * we received a hardware reset event over HWC interrupt. In this case,
+ * perform the device recovery procedure after MANA_SERVICE_PERIOD
+ * seconds.
+ */
+ if (err == -ETIMEDOUT || err == -EPROTO) {
+ dev_info(&pdev->dev, "Start MANA recovery mode\n");
+
+ mns_delayed_wk.pdev = pci_dev_get(pdev);
+ mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
+
+ INIT_DELAYED_WORK(&mns_delayed_wk.work, mana_serv_delayed_func);
+ schedule_delayed_work(&mns_delayed_wk.work,
+ secs_to_jiffies(MANA_SERVICE_PERIOD));
+ }
+
return err;
}
@@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
static void __exit mana_driver_exit(void)
{
+ cancel_delayed_work_sync(&mns_delayed_wk.work);
+
pci_unregister_driver(&mana_driver);
debugfs_remove(mana_debugfs_root);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 637f42485dba..1bb4c6ada2b6 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -430,6 +430,9 @@ struct gdma_context {
u64 pf_cap_flags1;
struct workqueue_struct *service_wq;
+
+ /* Count how many times we have finished probe or HWC events */
+ atomic_t in_probe;
};
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
@@ -592,6 +595,9 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
+/* Driver can handle hardware reset events during probe */
+#define GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE BIT(22)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -601,7 +607,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
- GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
+ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+ GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE)
#define GDMA_DRV_CAP_FLAGS2 0
--
2.43.0
next reply other threads:[~2025-11-15 2:28 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-15 2:28 longli [this message]
2025-11-16 16:36 ` [patch net-next] net: mana: Handle hardware reset events when probing the device Haiyang Zhang
2025-11-17 20:39 ` Long Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1763173729-28430-1-git-send-email-longli@linux.microsoft.com \
--to=longli@linux.microsoft.com \
--cc=davem@davemloft.net \
--cc=decui@microsoft.com \
--cc=edumazet@google.com \
--cc=erick.archer@outlook.com \
--cc=haiyangz@microsoft.com \
--cc=horms@kernel.org \
--cc=kotaranov@microsoft.com \
--cc=kuba@kernel.org \
--cc=kys@microsoft.com \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=schakrabarti@linux.microsoft.com \
--cc=shradhagupta@linux.microsoft.com \
--cc=wei.liu@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).