linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch net-next] net: mana: Handle hardware reset events when probing the device
@ 2025-11-15  2:28 longli
  2025-11-16 16:36 ` Haiyang Zhang
  0 siblings, 1 reply; 3+ messages in thread
From: longli @ 2025-11-15  2:28 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Wei Liu, Dexuan Cui,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Shradha Gupta, Simon Horman, Konstantin Taranov,
	Souradeep Chakrabarti, Erick Archer, linux-hyperv, netdev,
	linux-kernel, linux-rdma
  Cc: Long Li

From: Long Li <longli@microsoft.com>

When MANA is being probed, it's possible that hardware is in recovery
mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC in the
middle of the probe. Detect such condition and go through the recovery
service procedure.

Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
Signed-off-by: Long Li <longli@microsoft.com>
---
 .../net/ethernet/microsoft/mana/gdma_main.c   | 131 +++++++++++++++---
 include/net/mana/gdma.h                       |   9 +-
 2 files changed, 122 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index effe0a2f207a..1d9c2beb22b2 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -15,6 +15,12 @@
 
 struct dentry *mana_debugfs_root;
 
+static struct mana_serv_delayed_work {
+	struct delayed_work work;
+	struct pci_dev *pdev;
+	enum gdma_eqe_type type;
+} mns_delayed_wk;
+
 static u32 mana_gd_r32(struct gdma_context *g, u64 offset)
 {
 	return readl(g->bar0_va + offset);
@@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
 
 #define MANA_SERVICE_PERIOD 10
 
+static void mana_serv_rescan(struct pci_dev *pdev)
+{
+	struct pci_bus *parent;
+
+	pci_lock_rescan_remove();
+
+	parent = pdev->bus;
+	if (!parent) {
+		dev_err(&pdev->dev, "MANA service: no parent bus\n");
+		goto out;
+	}
+
+	pci_stop_and_remove_bus_device(pdev);
+	pci_rescan_bus(parent);
+
+out:
+	pci_unlock_rescan_remove();
+}
+
 static void mana_serv_fpga(struct pci_dev *pdev)
 {
 	struct pci_bus *bus, *parent;
@@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
 {
 	struct gdma_context *gc = pci_get_drvdata(pdev);
 	struct hw_channel_context *hwc;
+	int ret;
 
 	if (!gc) {
-		dev_err(&pdev->dev, "MANA service: no GC\n");
+		/* Perform PCI rescan on device if GC is not set up */
+		dev_err(&pdev->dev, "MANA service: GC not setup, rescanning\n");
+		mana_serv_rescan(pdev);
 		return;
 	}
 
@@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
 
 	msleep(MANA_SERVICE_PERIOD * 1000);
 
-	mana_gd_resume(pdev);
+	ret = mana_gd_resume(pdev);
+	if (ret == -ETIMEDOUT || ret == -EPROTO) {
+		/* Perform PCI rescan on device if we failed on HWC */
+		dev_err(&pdev->dev, "MANA service: resume failed, rescanning\n");
+		mana_serv_rescan(pdev);
+		goto out;
+	}
 
-	dev_info(&pdev->dev, "MANA reset cycle completed\n");
+	if (ret)
+		dev_info(&pdev->dev, "MANA reset cycle failed err %d\n", ret);
+	else
+		dev_info(&pdev->dev, "MANA reset cycle completed\n");
 
 out:
 	gc->in_service = false;
@@ -454,18 +491,9 @@ struct mana_serv_work {
 	enum gdma_eqe_type type;
 };
 
-static void mana_serv_func(struct work_struct *w)
+static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)
 {
-	struct mana_serv_work *mns_wk;
-	struct pci_dev *pdev;
-
-	mns_wk = container_of(w, struct mana_serv_work, serv_work);
-	pdev = mns_wk->pdev;
-
-	if (!pdev)
-		goto out;
-
-	switch (mns_wk->type) {
+	switch (type) {
 	case GDMA_EQE_HWC_FPGA_RECONFIG:
 		mana_serv_fpga(pdev);
 		break;
@@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct *w)
 		break;
 
 	default:
-		dev_err(&pdev->dev, "MANA service: unknown type %d\n",
-			mns_wk->type);
+		dev_err(&pdev->dev, "MANA service: unknown type %d\n", type);
 		break;
 	}
+}
+
+static void mana_serv_delayed_func(struct work_struct *w)
+{
+	struct mana_serv_delayed_work *dwork;
+	struct pci_dev *pdev;
+
+	dwork = container_of(w, struct mana_serv_delayed_work, work.work);
+	pdev = dwork->pdev;
+
+	if (pdev)
+		mana_do_service(dwork->type, pdev);
+
+	pci_dev_put(pdev);
+}
+
+static void mana_serv_func(struct work_struct *w)
+{
+	struct mana_serv_work *mns_wk;
+	struct pci_dev *pdev;
+
+	mns_wk = container_of(w, struct mana_serv_work, serv_work);
+	pdev = mns_wk->pdev;
+
+	if (pdev)
+		mana_do_service(mns_wk->type, pdev);
 
-out:
 	pci_dev_put(pdev);
 	kfree(mns_wk);
 	module_put(THIS_MODULE);
@@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
 	case GDMA_EQE_HWC_RESET_REQUEST:
 		dev_info(gc->dev, "Recv MANA service type:%d\n", type);
 
+		if (atomic_inc_return(&gc->in_probe) == 1) {
+			/*
+			 * Device is in probe and we received an hardware reset
+			 * event, probe() will detect that "in_probe" has
+			 * changed and perform service procedure.
+			 */
+			dev_info(gc->dev,
+				 "Service is to be processed in probe\n");
+			break;
+		}
+
 		if (gc->in_service) {
 			dev_info(gc->dev, "Already in service\n");
 			break;
@@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev->slot),
 							  mana_debugfs_root);
 
+	atomic_set(&gc->in_probe, 0);
+
 	err = mana_gd_setup(pdev);
 	if (err)
 		goto unmap_bar;
@@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto cleanup_mana;
 
+	/*
+	 * If a hardware reset event has occurred over HWC during probe,
+	 * rollback and perform hardware reset procedure.
+	 */
+	if (atomic_inc_return(&gc->in_probe) > 1) {
+		err = -EPROTO;
+		goto cleanup_mana_rdma;
+	}
+
 	return 0;
 
+cleanup_mana_rdma:
+	mana_rdma_remove(&gc->mana_ib);
 cleanup_mana:
 	mana_remove(&gc->mana, false);
 cleanup_gd:
@@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 disable_dev:
 	pci_disable_device(pdev);
 	dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
+
+	/*
+	 * Hardware could be in recovery mode and the HWC returns TIMEDOUT or
+	 * EPROTO from mana_gd_setup(), mana_probe() or mana_rdma_probe(), or
+	 * we received a hardware reset event over HWC interrupt. In this case,
+	 * perform the device recovery procedure after MANA_SERVICE_PERIOD
+	 * seconds.
+	 */
+	if (err == -ETIMEDOUT || err == -EPROTO) {
+		dev_info(&pdev->dev, "Start MANA recovery mode\n");
+
+		mns_delayed_wk.pdev = pci_dev_get(pdev);
+		mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
+
+		INIT_DELAYED_WORK(&mns_delayed_wk.work, mana_serv_delayed_func);
+		schedule_delayed_work(&mns_delayed_wk.work,
+				      secs_to_jiffies(MANA_SERVICE_PERIOD));
+	}
+
 	return err;
 }
 
@@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
 
 static void __exit mana_driver_exit(void)
 {
+	cancel_delayed_work_sync(&mns_delayed_wk.work);
+
 	pci_unregister_driver(&mana_driver);
 
 	debugfs_remove(mana_debugfs_root);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 637f42485dba..1bb4c6ada2b6 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -430,6 +430,9 @@ struct gdma_context {
 	u64 pf_cap_flags1;
 
 	struct workqueue_struct *service_wq;
+
+	/* Count how many times we have finished probe or HWC events */
+	atomic_t		in_probe;
 };
 
 static inline bool mana_gd_is_mana(struct gdma_dev *gd)
@@ -592,6 +595,9 @@ enum {
 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
 #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
 
+/* Driver can handle hardware reset events during probe */
+#define GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE BIT(22)
+
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -601,7 +607,8 @@ enum {
 	 GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
 	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
 	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
-	 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
+	 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+	 GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [patch net-next] net: mana: Handle hardware reset events when probing the device
  2025-11-15  2:28 [patch net-next] net: mana: Handle hardware reset events when probing the device longli
@ 2025-11-16 16:36 ` Haiyang Zhang
  2025-11-17 20:39   ` Long Li
  0 siblings, 1 reply; 3+ messages in thread
From: Haiyang Zhang @ 2025-11-16 16:36 UTC (permalink / raw)
  To: longli@linux.microsoft.com, KY Srinivasan, Wei Liu, Dexuan Cui,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Shradha Gupta, Simon Horman, Konstantin Taranov,
	Souradeep Chakrabarti, Erick Archer, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-rdma@vger.kernel.org
  Cc: Long Li



> -----Original Message-----
> From: longli@linux.microsoft.com <longli@linux.microsoft.com>
> Sent: Friday, November 14, 2025 9:29 PM
> To: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Wei Liu <wei.liu@kernel.org>; Dexuan Cui
> <DECUI@microsoft.com>; David S. Miller <davem@davemloft.net>; Eric Dumazet
> <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
> <pabeni@redhat.com>; Shradha Gupta <shradhagupta@linux.microsoft.com>;
> Simon Horman <horms@kernel.org>; Konstantin Taranov
> <kotaranov@microsoft.com>; Souradeep Chakrabarti
> <schakrabarti@linux.microsoft.com>; Erick Archer
> <erick.archer@outlook.com>; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; linux-
> rdma@vger.kernel.org
> Cc: Long Li <longli@microsoft.com>
> Subject: [patch net-next] net: mana: Handle hardware reset events when
> probing the device
> 
> From: Long Li <longli@microsoft.com>
> 
> When MANA is being probed, it's possible that hardware is in recovery
> mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC in the
> middle of the probe. Detect such condition and go through the recovery
> service procedure.
> 
> Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
>  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 +++++++++++++++---
>  include/net/mana/gdma.h                       |   9 +-
>  2 files changed, 122 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index effe0a2f207a..1d9c2beb22b2 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -15,6 +15,12 @@
> 
>  struct dentry *mana_debugfs_root;
> 
> +static struct mana_serv_delayed_work {
> +	struct delayed_work work;
> +	struct pci_dev *pdev;
> +	enum gdma_eqe_type type;
> +} mns_delayed_wk;
> +
>  static u32 mana_gd_r32(struct gdma_context *g, u64 offset)
>  {
>  	return readl(g->bar0_va + offset);
> @@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
> 
>  #define MANA_SERVICE_PERIOD 10
> 
> +static void mana_serv_rescan(struct pci_dev *pdev)
> +{
> +	struct pci_bus *parent;
> +
> +	pci_lock_rescan_remove();
> +
> +	parent = pdev->bus;
> +	if (!parent) {
> +		dev_err(&pdev->dev, "MANA service: no parent bus\n");
> +		goto out;
> +	}
> +
> +	pci_stop_and_remove_bus_device(pdev);
> +	pci_rescan_bus(parent);
> +
> +out:
> +	pci_unlock_rescan_remove();
> +}
> +
>  static void mana_serv_fpga(struct pci_dev *pdev)
>  {
>  	struct pci_bus *bus, *parent;
> @@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
>  {
>  	struct gdma_context *gc = pci_get_drvdata(pdev);
>  	struct hw_channel_context *hwc;
> +	int ret;
> 
>  	if (!gc) {
> -		dev_err(&pdev->dev, "MANA service: no GC\n");
> +		/* Perform PCI rescan on device if GC is not set up */
> +		dev_err(&pdev->dev, "MANA service: GC not setup,
> rescanning\n");
> +		mana_serv_rescan(pdev);
>  		return;
>  	}
> 
> @@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
> 
>  	msleep(MANA_SERVICE_PERIOD * 1000);
> 
> -	mana_gd_resume(pdev);
> +	ret = mana_gd_resume(pdev);
> +	if (ret == -ETIMEDOUT || ret == -EPROTO) {
> +		/* Perform PCI rescan on device if we failed on HWC */
> +		dev_err(&pdev->dev, "MANA service: resume failed,
> rescanning\n");
> +		mana_serv_rescan(pdev);
> +		goto out;
> +	}
> 
> -	dev_info(&pdev->dev, "MANA reset cycle completed\n");
> +	if (ret)
> +		dev_info(&pdev->dev, "MANA reset cycle failed err %d\n", ret);
> +	else
> +		dev_info(&pdev->dev, "MANA reset cycle completed\n");
> 
>  out:
>  	gc->in_service = false;
> @@ -454,18 +491,9 @@ struct mana_serv_work {
>  	enum gdma_eqe_type type;
>  };
> 
> -static void mana_serv_func(struct work_struct *w)
> +static void mana_do_service(enum gdma_eqe_type type, struct pci_dev
> *pdev)
>  {
> -	struct mana_serv_work *mns_wk;
> -	struct pci_dev *pdev;
> -
> -	mns_wk = container_of(w, struct mana_serv_work, serv_work);
> -	pdev = mns_wk->pdev;
> -
> -	if (!pdev)
> -		goto out;
> -
> -	switch (mns_wk->type) {
> +	switch (type) {
>  	case GDMA_EQE_HWC_FPGA_RECONFIG:
>  		mana_serv_fpga(pdev);
>  		break;
> @@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct *w)
>  		break;
> 
>  	default:
> -		dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> -			mns_wk->type);
> +		dev_err(&pdev->dev, "MANA service: unknown type %d\n", type);
>  		break;
>  	}
> +}
> +
> +static void mana_serv_delayed_func(struct work_struct *w)
> +{
> +	struct mana_serv_delayed_work *dwork;
> +	struct pci_dev *pdev;
> +
> +	dwork = container_of(w, struct mana_serv_delayed_work, work.work);
> +	pdev = dwork->pdev;
> +
> +	if (pdev)
> +		mana_do_service(dwork->type, pdev);
> +
> +	pci_dev_put(pdev);
> +}
> +
> +static void mana_serv_func(struct work_struct *w)
> +{
> +	struct mana_serv_work *mns_wk;
> +	struct pci_dev *pdev;
> +
> +	mns_wk = container_of(w, struct mana_serv_work, serv_work);
> +	pdev = mns_wk->pdev;
> +
> +	if (pdev)
> +		mana_do_service(mns_wk->type, pdev);
> 
> -out:
>  	pci_dev_put(pdev);
>  	kfree(mns_wk);
>  	module_put(THIS_MODULE);
> @@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct gdma_queue
> *eq)
>  	case GDMA_EQE_HWC_RESET_REQUEST:
>  		dev_info(gc->dev, "Recv MANA service type:%d\n", type);
> 
> +		if (atomic_inc_return(&gc->in_probe) == 1) {

Since we don't care about how many times it entered probe/service,
test_and_set_bit() should be sufficient here.

> +			/*
> +			 * Device is in probe and we received an hardware reset
> +			 * event, probe() will detect that "in_probe" has
> +			 * changed and perform service procedure.
> +			 */
> +			dev_info(gc->dev,
> +				 "Service is to be processed in probe\n");
> +			break;
> +		}
> +
>  		if (gc->in_service) {
>  			dev_info(gc->dev, "Already in service\n");
>  			break;
> @@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const
> struct pci_device_id *ent)
>  		gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev-
> >slot),
>  							  mana_debugfs_root);
> 
> +	atomic_set(&gc->in_probe, 0);
> +
>  	err = mana_gd_setup(pdev);
>  	if (err)
>  		goto unmap_bar;
> @@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>  	if (err)
>  		goto cleanup_mana;
> 
> +	/*
> +	 * If a hardware reset event has occurred over HWC during probe,
> +	 * rollback and perform hardware reset procedure.
> +	 */
> +	if (atomic_inc_return(&gc->in_probe) > 1) {
> +		err = -EPROTO;
> +		goto cleanup_mana_rdma;
> +	}
> +
>  	return 0;
> 
> +cleanup_mana_rdma:
> +	mana_rdma_remove(&gc->mana_ib);
>  cleanup_mana:
>  	mana_remove(&gc->mana, false);
>  cleanup_gd:
> @@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>  disable_dev:
>  	pci_disable_device(pdev);
>  	dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
> +
> +	/*
> +	 * Hardware could be in recovery mode and the HWC returns TIMEDOUT
> or
> +	 * EPROTO from mana_gd_setup(), mana_probe() or mana_rdma_probe(),
> or
> +	 * we received a hardware reset event over HWC interrupt. In this
> case,
> +	 * perform the device recovery procedure after MANA_SERVICE_PERIOD
> +	 * seconds.
> +	 */
> +	if (err == -ETIMEDOUT || err == -EPROTO) {
> +		dev_info(&pdev->dev, "Start MANA recovery mode\n");
> +
> +		mns_delayed_wk.pdev = pci_dev_get(pdev);
> +		mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
> +
> +		INIT_DELAYED_WORK(&mns_delayed_wk.work,
> mana_serv_delayed_func);

To avoid INIT_DELAYED_WORK potentially multiple times this should be in 
the mana_driver_init()

> +		schedule_delayed_work(&mns_delayed_wk.work,
> +				      secs_to_jiffies(MANA_SERVICE_PERIOD));
> +	}
> +
>  	return err;
>  }
> 
> @@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
> 
>  static void __exit mana_driver_exit(void)
>  {
> +	cancel_delayed_work_sync(&mns_delayed_wk.work);

I think we should call disable_delayed_work_sync() to prevent the work
scheduled again after this line.

> +
>  	pci_unregister_driver(&mana_driver);
> 
>  	debugfs_remove(mana_debugfs_root);
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index 637f42485dba..1bb4c6ada2b6 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -430,6 +430,9 @@ struct gdma_context {
>  	u64 pf_cap_flags1;
> 
>  	struct workqueue_struct *service_wq;
> +
> +	/* Count how many times we have finished probe or HWC events */
> +	atomic_t		in_probe;
>  };
> 
>  static inline bool mana_gd_is_mana(struct gdma_dev *gd)
> @@ -592,6 +595,9 @@ enum {
>  #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
>  #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
> 
> +/* Driver can handle hardware reset events during probe */
> +#define GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE BIT(22)
> +
>  #define GDMA_DRV_CAP_FLAGS1 \
>  	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
>  	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
> @@ -601,7 +607,8 @@ enum {
>  	 GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
>  	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
>  	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
> -	 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
> +	 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
> +	 GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE)
> 
>  #define GDMA_DRV_CAP_FLAGS2 0
> 
> --
> 2.43.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [patch net-next] net: mana: Handle hardware reset events when probing the device
  2025-11-16 16:36 ` Haiyang Zhang
@ 2025-11-17 20:39   ` Long Li
  0 siblings, 0 replies; 3+ messages in thread
From: Long Li @ 2025-11-17 20:39 UTC (permalink / raw)
  To: Haiyang Zhang, longli@linux.microsoft.com, KY Srinivasan, Wei Liu,
	Dexuan Cui, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Shradha Gupta, Simon Horman, Konstantin Taranov,
	Souradeep Chakrabarti, Erick Archer, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-rdma@vger.kernel.org

> Subject: RE: [patch net-next] net: mana: Handle hardware reset events when
> probing the device
> 
> 
> 
> > -----Original Message-----
> > From: longli@linux.microsoft.com <longli@linux.microsoft.com>
> > Sent: Friday, November 14, 2025 9:29 PM
> > To: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> > <haiyangz@microsoft.com>; Wei Liu <wei.liu@kernel.org>; Dexuan Cui
> > <DECUI@microsoft.com>; David S. Miller <davem@davemloft.net>; Eric
> > Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>;
> Paolo
> > Abeni <pabeni@redhat.com>; Shradha Gupta
> > <shradhagupta@linux.microsoft.com>;
> > Simon Horman <horms@kernel.org>; Konstantin Taranov
> > <kotaranov@microsoft.com>; Souradeep Chakrabarti
> > <schakrabarti@linux.microsoft.com>; Erick Archer
> > <erick.archer@outlook.com>; linux-hyperv@vger.kernel.org;
> > netdev@vger.kernel.org; linux-kernel@vger.kernel.org; linux-
> > rdma@vger.kernel.org
> > Cc: Long Li <longli@microsoft.com>
> > Subject: [patch net-next] net: mana: Handle hardware reset events when
> > probing the device
> >
> > From: Long Li <longli@microsoft.com>
> >
> > When MANA is being probed, it's possible that hardware is in recovery
> > mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC
> in the
> > middle of the probe. Detect such condition and go through the recovery
> > service procedure.
> >
> > Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
> > Signed-off-by: Long Li <longli@microsoft.com>
> > ---
> >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 +++++++++++++++-
> --
> >  include/net/mana/gdma.h                       |   9 +-
> >  2 files changed, 122 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > index effe0a2f207a..1d9c2beb22b2 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > @@ -15,6 +15,12 @@
> >
> >  struct dentry *mana_debugfs_root;
> >
> > +static struct mana_serv_delayed_work {
> > +	struct delayed_work work;
> > +	struct pci_dev *pdev;
> > +	enum gdma_eqe_type type;
> > +} mns_delayed_wk;
> > +
> >  static u32 mana_gd_r32(struct gdma_context *g, u64 offset)  {
> >  	return readl(g->bar0_va + offset);
> > @@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq,
> "NET_MANA");
> >
> >  #define MANA_SERVICE_PERIOD 10
> >
> > +static void mana_serv_rescan(struct pci_dev *pdev) {
> > +	struct pci_bus *parent;
> > +
> > +	pci_lock_rescan_remove();
> > +
> > +	parent = pdev->bus;
> > +	if (!parent) {
> > +		dev_err(&pdev->dev, "MANA service: no parent bus\n");
> > +		goto out;
> > +	}
> > +
> > +	pci_stop_and_remove_bus_device(pdev);
> > +	pci_rescan_bus(parent);
> > +
> > +out:
> > +	pci_unlock_rescan_remove();
> > +}
> > +
> >  static void mana_serv_fpga(struct pci_dev *pdev)  {
> >  	struct pci_bus *bus, *parent;
> > @@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
> > {
> >  	struct gdma_context *gc = pci_get_drvdata(pdev);
> >  	struct hw_channel_context *hwc;
> > +	int ret;
> >
> >  	if (!gc) {
> > -		dev_err(&pdev->dev, "MANA service: no GC\n");
> > +		/* Perform PCI rescan on device if GC is not set up */
> > +		dev_err(&pdev->dev, "MANA service: GC not setup,
> > rescanning\n");
> > +		mana_serv_rescan(pdev);
> >  		return;
> >  	}
> >
> > @@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
> >
> >  	msleep(MANA_SERVICE_PERIOD * 1000);
> >
> > -	mana_gd_resume(pdev);
> > +	ret = mana_gd_resume(pdev);
> > +	if (ret == -ETIMEDOUT || ret == -EPROTO) {
> > +		/* Perform PCI rescan on device if we failed on HWC */
> > +		dev_err(&pdev->dev, "MANA service: resume failed,
> > rescanning\n");
> > +		mana_serv_rescan(pdev);
> > +		goto out;
> > +	}
> >
> > -	dev_info(&pdev->dev, "MANA reset cycle completed\n");
> > +	if (ret)
> > +		dev_info(&pdev->dev, "MANA reset cycle failed err %d\n",
> ret);
> > +	else
> > +		dev_info(&pdev->dev, "MANA reset cycle completed\n");
> >
> >  out:
> >  	gc->in_service = false;
> > @@ -454,18 +491,9 @@ struct mana_serv_work {
> >  	enum gdma_eqe_type type;
> >  };
> >
> > -static void mana_serv_func(struct work_struct *w)
> > +static void mana_do_service(enum gdma_eqe_type type, struct pci_dev
> > *pdev)
> >  {
> > -	struct mana_serv_work *mns_wk;
> > -	struct pci_dev *pdev;
> > -
> > -	mns_wk = container_of(w, struct mana_serv_work, serv_work);
> > -	pdev = mns_wk->pdev;
> > -
> > -	if (!pdev)
> > -		goto out;
> > -
> > -	switch (mns_wk->type) {
> > +	switch (type) {
> >  	case GDMA_EQE_HWC_FPGA_RECONFIG:
> >  		mana_serv_fpga(pdev);
> >  		break;
> > @@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct
> *w)
> >  		break;
> >
> >  	default:
> > -		dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> > -			mns_wk->type);
> > +		dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> type);
> >  		break;
> >  	}
> > +}
> > +
> > +static void mana_serv_delayed_func(struct work_struct *w) {
> > +	struct mana_serv_delayed_work *dwork;
> > +	struct pci_dev *pdev;
> > +
> > +	dwork = container_of(w, struct mana_serv_delayed_work,
> work.work);
> > +	pdev = dwork->pdev;
> > +
> > +	if (pdev)
> > +		mana_do_service(dwork->type, pdev);
> > +
> > +	pci_dev_put(pdev);
> > +}
> > +
> > +static void mana_serv_func(struct work_struct *w) {
> > +	struct mana_serv_work *mns_wk;
> > +	struct pci_dev *pdev;
> > +
> > +	mns_wk = container_of(w, struct mana_serv_work, serv_work);
> > +	pdev = mns_wk->pdev;
> > +
> > +	if (pdev)
> > +		mana_do_service(mns_wk->type, pdev);
> >
> > -out:
> >  	pci_dev_put(pdev);
> >  	kfree(mns_wk);
> >  	module_put(THIS_MODULE);
> > @@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct
> gdma_queue
> > *eq)
> >  	case GDMA_EQE_HWC_RESET_REQUEST:
> >  		dev_info(gc->dev, "Recv MANA service type:%d\n", type);
> >
> > +		if (atomic_inc_return(&gc->in_probe) == 1) {
> 
> Since we don't care about how many times it entered probe/service,
> test_and_set_bit() should be sufficient here.
> 
> > +			/*
> > +			 * Device is in probe and we received an hardware
> reset
> > +			 * event, probe() will detect that "in_probe" has
> > +			 * changed and perform service procedure.
> > +			 */
> > +			dev_info(gc->dev,
> > +				 "Service is to be processed in probe\n");
> > +			break;
> > +		}
> > +
> >  		if (gc->in_service) {
> >  			dev_info(gc->dev, "Already in service\n");
> >  			break;
> > @@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> >  		gc->mana_pci_debugfs =
> debugfs_create_dir(pci_slot_name(pdev-
> > >slot),
> >
> mana_debugfs_root);
> >
> > +	atomic_set(&gc->in_probe, 0);
> > +
> >  	err = mana_gd_setup(pdev);
> >  	if (err)
> >  		goto unmap_bar;
> > @@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> >  	if (err)
> >  		goto cleanup_mana;
> >
> > +	/*
> > +	 * If a hardware reset event has occurred over HWC during probe,
> > +	 * rollback and perform hardware reset procedure.
> > +	 */
> > +	if (atomic_inc_return(&gc->in_probe) > 1) {
> > +		err = -EPROTO;
> > +		goto cleanup_mana_rdma;
> > +	}
> > +
> >  	return 0;
> >
> > +cleanup_mana_rdma:
> > +	mana_rdma_remove(&gc->mana_ib);
> >  cleanup_mana:
> >  	mana_remove(&gc->mana, false);
> >  cleanup_gd:
> > @@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> >  disable_dev:
> >  	pci_disable_device(pdev);
> >  	dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
> > +
> > +	/*
> > +	 * Hardware could be in recovery mode and the HWC returns
> TIMEDOUT
> > or
> > +	 * EPROTO from mana_gd_setup(), mana_probe() or
> mana_rdma_probe(),
> > or
> > +	 * we received a hardware reset event over HWC interrupt. In this
> > case,
> > +	 * perform the device recovery procedure after
> MANA_SERVICE_PERIOD
> > +	 * seconds.
> > +	 */
> > +	if (err == -ETIMEDOUT || err == -EPROTO) {
> > +		dev_info(&pdev->dev, "Start MANA recovery mode\n");
> > +
> > +		mns_delayed_wk.pdev = pci_dev_get(pdev);
> > +		mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
> > +
> > +		INIT_DELAYED_WORK(&mns_delayed_wk.work,
> > mana_serv_delayed_func);
> 
> To avoid INIT_DELAYED_WORK potentially multiple times this should be in the
> mana_driver_init()
> 
> > +		schedule_delayed_work(&mns_delayed_wk.work,
> > +				      secs_to_jiffies(MANA_SERVICE_PERIOD));
> > +	}
> > +
> >  	return err;
> >  }
> >
> > @@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
> >
> >  static void __exit mana_driver_exit(void)  {
> > +	cancel_delayed_work_sync(&mns_delayed_wk.work);
> 
> I think we should call disable_delayed_work_sync() to prevent the work
> scheduled again after this line.

Thank you. I will send v2 to address all the comments and support multiple PCI devices in BM mode.

Long

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-11-17 20:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-15  2:28 [patch net-next] net: mana: Handle hardware reset events when probing the device longli
2025-11-16 16:36 ` Haiyang Zhang
2025-11-17 20:39   ` Long Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).