[PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout

public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout
@ 2026-02-23  8:47 Dipayaan Roy
  2026-02-24  8:26 ` [net-next] " Simon Horman
  2026-02-26 19:48 ` [PATCH, net-next] " Long Li
  0 siblings, 2 replies; 5+ messages in thread
From: Dipayaan Roy @ 2026-02-23  8:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, dipayanroy

The GF stats periodic query is used as mechanism to monitor HWC health
check. If this HWC command times out, it is a strong indication that
the device/SoC is in a faulty state and requires recovery.

Today, when a timeout is detected, the driver marks
hwc_timeout_occurred, clears cached stats, and stops rescheduling the
periodic work. However, the device itself is left in the same failing
state.

Extend the timeout handling path to trigger the existing MANA VF
recovery service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item.
This is expected to initiate the appropriate recovery flow by suspende
resume first and if it fails then trigger a bus rescan.

This change is intentionally limited to HWC command timeouts and does
not trigger recovery for errors reported by the SoC as a normal command
response.

Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
---
 .../net/ethernet/microsoft/mana/gdma_main.c   | 14 +++-------
 drivers/net/ethernet/microsoft/mana/mana_en.c | 28 ++++++++++++++++++-
 include/net/mana/gdma.h                       | 16 +++++++++--
 3 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 0055c231acf6..16c438d2aaa3 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -490,15 +490,9 @@ static void mana_serv_reset(struct pci_dev *pdev)
 		dev_info(&pdev->dev, "MANA reset cycle completed\n");
 
 out:
-	gc->in_service = false;
+	clear_bit(GC_IN_SERVICE, &gc->flags);
 }
 
-struct mana_serv_work {
-	struct work_struct serv_work;
-	struct pci_dev *pdev;
-	enum gdma_eqe_type type;
-};
-
 static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)
 {
 	switch (type) {
@@ -542,7 +536,7 @@ static void mana_recovery_delayed_func(struct work_struct *w)
 	spin_unlock_irqrestore(&work->lock, flags);
 }
 
-static void mana_serv_func(struct work_struct *w)
+void mana_serv_func(struct work_struct *w)
 {
 	struct mana_serv_work *mns_wk;
 	struct pci_dev *pdev;
@@ -624,7 +618,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
 			break;
 		}
 
-		if (gc->in_service) {
+		if (test_bit(GC_IN_SERVICE, &gc->flags)) {
 			dev_info(gc->dev, "Already in service\n");
 			break;
 		}
@@ -641,7 +635,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
 		}
 
 		dev_info(gc->dev, "Start MANA service type:%d\n", type);
-		gc->in_service = true;
+		set_bit(GC_IN_SERVICE, &gc->flags);
 		mns_wk->pdev = to_pci_dev(gc->dev);
 		mns_wk->type = type;
 		pci_dev_get(mns_wk->pdev);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 91c418097284..8da574cf06f2 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -879,7 +879,7 @@ static void mana_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 	struct gdma_context *gc = ac->gdma_dev->gdma_context;
 
 	/* Already in service, hence tx queue reset is not required.*/
-	if (gc->in_service)
+	if (test_bit(GC_IN_SERVICE, &gc->flags))
 		return;
 
 	/* Note: If there are pending queue reset work for this port(apc),
@@ -3533,6 +3533,8 @@ static void mana_gf_stats_work_handler(struct work_struct *work)
 {
 	struct mana_context *ac =
 		container_of(to_delayed_work(work), struct mana_context, gf_stats_work);
+	struct gdma_context *gc = ac->gdma_dev->gdma_context;
+	struct mana_serv_work *mns_wk;
 	int err;
 
 	err = mana_query_gf_stats(ac);
@@ -3540,6 +3542,30 @@ static void mana_gf_stats_work_handler(struct work_struct *work)
 		/* HWC timeout detected - reset stats and stop rescheduling */
 		ac->hwc_timeout_occurred = true;
 		memset(&ac->hc_stats, 0, sizeof(ac->hc_stats));
+		dev_warn(gc->dev,
+			 "Gf stats wk handler: gf stats query timed out.\n");
+
+		/* As HWC timed out, indicating a faulty HW state and needs a
+		 * reset.
+		 */
+		if (!test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {
+			if (!try_module_get(THIS_MODULE)) {
+				dev_info(gc->dev, "Module is unloading\n");
+				return;
+			}
+
+			mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
+			if (!mns_wk) {
+				module_put(THIS_MODULE);
+				return;
+			}
+
+			mns_wk->pdev = to_pci_dev(gc->dev);
+			mns_wk->type = GDMA_EQE_HWC_RESET_REQUEST;
+			pci_dev_get(mns_wk->pdev);
+			INIT_WORK(&mns_wk->serv_work, mana_serv_func);
+			schedule_work(&mns_wk->serv_work);
+		}
 		return;
 	}
 	schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index a59bd4035a99..fb946389d593 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -213,6 +213,12 @@ enum gdma_page_type {
 
 #define GDMA_INVALID_DMA_REGION 0
 
+struct mana_serv_work {
+	struct work_struct serv_work;
+	struct pci_dev *pdev;
+	enum gdma_eqe_type type;
+};
+
 struct gdma_mem_info {
 	struct device *dev;
 
@@ -384,6 +390,7 @@ struct gdma_irq_context {
 
 enum gdma_context_flags {
 	GC_PROBE_SUCCEEDED	= 0,
+	GC_IN_SERVICE		= 1,
 };
 
 struct gdma_context {
@@ -409,7 +416,6 @@ struct gdma_context {
 	u32			test_event_eq_id;
 
 	bool			is_pf;
-	bool			in_service;
 
 	phys_addr_t		bar0_pa;
 	void __iomem		*bar0_va;
@@ -471,6 +477,8 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
 
 void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);
 
+void mana_serv_func(struct work_struct *w);
+
 struct gdma_wqe {
 	u32 reserved	:24;
 	u32 last_vbytes	:8;
@@ -613,6 +621,9 @@ enum {
 /* Driver can handle hardware recovery events during probe */
 #define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22)
 
+/* Driver supports self recovery on Hardware Channel timeouts */
+#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY BIT(25)
+
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -626,7 +637,8 @@ enum {
 	 GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \
 	 GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \
 	 GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \
-	 GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY)
+	 GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY | \
+	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout
  2026-02-23  8:47 [PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout Dipayaan Roy
@ 2026-02-24  8:26 ` Simon Horman
  2026-02-27  8:07   ` Dipayaan Roy
  2026-02-26 19:48 ` [PATCH, net-next] " Long Li
  1 sibling, 1 reply; 5+ messages in thread
From: Simon Horman @ 2026-02-24  8:26 UTC (permalink / raw)
  To: dipayanroy
  Cc: Simon Horman, kuba, decui, pabeni, dipayanroy, kotaranov,
	linux-kernel, edumazet, davem, linux-rdma, shradhagupta, kys,
	ssengar, haiyangz, linux-hyperv, wei.liu, netdev, longli, ernis,
	shirazsaleem, andrew+netdev

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout

The GF stats periodic query is used as mechanism to monitor HWC health
check. If this HWC command times out, it is a strong indication that
the device/SoC is in a faulty state and requires recovery.

Today, when a timeout is detected, the driver marks
hwc_timeout_occurred, clears cached stats, and stops rescheduling the
periodic work. However, the device itself is left in the same failing
state.

Extend the timeout handling path to trigger the existing MANA VF
recovery service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item.
This is expected to initiate the appropriate recovery flow by suspende
resume first and if it fails then trigger a bus rescan.

This change is intentionally limited to HWC command timeouts and does
not trigger recovery for errors reported by the SoC as a normal command
response.

> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 0055c231acf6..16c438d2aaa3 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c

[ ... ]

> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 9b5a72ada5c4..e6a4034b40f9 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c

[ ... ]

> @@ -3530,6 +3530,8 @@ static void mana_gf_stats_work_handler(struct work_struct *work)
>  {
>  	struct mana_context *ac =
>  		container_of(to_delayed_work(work), struct mana_context, gf_stats_work);
> +	struct gdma_context *gc = ac->gdma_dev->gdma_context;
> +	struct mana_serv_work *mns_wk;
>  	int err;
>
>  	err = mana_query_gf_stats(ac);
> @@ -3537,6 +3539,30 @@ static void mana_gf_stats_work_handler(struct work_struct *work)
>  		/* HWC timeout detected - reset stats and stop rescheduling */
>  		ac->hwc_timeout_occurred = true;
>  		memset(&ac->hc_stats, 0, sizeof(ac->hc_stats));
> +		dev_warn(gc->dev,
> +			 "Gf stats wk handler: gf stats query timed out.\n");
> +
> +		/* As HWC timed out, indicating a faulty HW state and needs a
> +		 * reset.
> +		 */
> +		if (!test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {
> +			if (!try_module_get(THIS_MODULE)) {
> +				dev_info(gc->dev, "Module is unloading\n");
> +				return;
> +			}
> +
> +			mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
> +			if (!mns_wk) {
> +				module_put(THIS_MODULE);
> +				return;
> +			}

Does this leave the GC_IN_SERVICE flag permanently set? When
test_and_set_bit() atomically sets the bit and then try_module_get()
fails or kzalloc() fails, the function returns without clearing
GC_IN_SERVICE.

This would permanently block all future recovery attempts. Looking at
mana_gd_process_eqe() in gdma_main.c:

	if (test_bit(GC_IN_SERVICE, &gc->flags)) {
		dev_info(gc->dev, "Already in service\n");
		break;
	}

It would detect the set bit and skip the reset. Similarly, mana_tx_timeout()
in mana_en.c would also short-circuit:

	if (test_bit(GC_IN_SERVICE, &gc->flags))
		return;

The device cannot recover without manual intervention since the flag is only
cleared by mana_serv_reset() at the end of the service work:

drivers/net/ethernet/microsoft/mana/gdma_main.c:mana_serv_reset()
{
	...
out:
	clear_bit(GC_IN_SERVICE, &gc->flags);
}

But since the work is never scheduled in these error paths, clear_bit() is
never called.

Note that the existing mana_gd_process_eqe() path avoids this problem by
calling set_bit(GC_IN_SERVICE) only AFTER all allocations succeed:

drivers/net/ethernet/microsoft/mana/gdma_main.c:mana_gd_process_eqe()
{
	...
	if (test_bit(GC_IN_SERVICE, &gc->flags)) {
		dev_info(gc->dev, "Already in service\n");
		break;
	}

	if (!try_module_get(THIS_MODULE)) {
		...
		break;
	}

	mns_wk = kzalloc(sizeof(*mns_wk), GFP_KERNEL);
	if (!mns_wk) {
		module_put(THIS_MODULE);
		break;
	}

	dev_info(gc->dev, "Start MANA service type:%d\n", type);
	set_bit(GC_IN_SERVICE, &gc->flags);  // Set only after success
	...
}

Should clear_bit(GC_IN_SERVICE, &gc->flags) be added before each error
return?

> +
> +			mns_wk->pdev = to_pci_dev(gc->dev);
> +			mns_wk->type = GDMA_EQE_HWC_RESET_REQUEST;
> +			pci_dev_get(mns_wk->pdev);
> +			INIT_WORK(&mns_wk->serv_work, mana_serv_func);
> +			schedule_work(&mns_wk->serv_work);
> +		}

This isn't a bug, but the recovery initiation sequence here is a
near-duplicate of the pattern in mana_gd_process_eqe() (gdma_main.c).
Both perform the identical 7-step sequence: try_module_get, kzalloc
mana_serv_work, set pdev, set type, pci_dev_get, INIT_WORK, schedule_work.

The duplication already contributed to the GC_IN_SERVICE error path issue
above, where this new copy diverged from the original in its error handling.

Consider extracting a common helper to avoid divergence.

>  		return;
>  	}
>  	schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);

[ ... ]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout
  2026-02-23  8:47 [PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout Dipayaan Roy
  2026-02-24  8:26 ` [net-next] " Simon Horman
@ 2026-02-26 19:48 ` Long Li
  2026-02-27  8:10   ` Dipayaan Roy
  1 sibling, 1 reply; 5+ messages in thread
From: Long Li @ 2026-02-26 19:48 UTC (permalink / raw)
  To: Dipayaan Roy, KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org,
	Dexuan Cui, andrew+netdev@lunn.ch, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	Konstantin Taranov, horms@kernel.org,
	shradhagupta@linux.microsoft.com, ssengar@linux.microsoft.com,
	ernis@linux.microsoft.com, Shiraz Saleem,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org,
	Dipayaan Roy

> The GF stats periodic query is used as mechanism to monitor HWC health check.
> If this HWC command times out, it is a strong indication that the device/SoC is in a
> faulty state and requires recovery.
> 
> Today, when a timeout is detected, the driver marks hwc_timeout_occurred,
> clears cached stats, and stops rescheduling the periodic work. However, the
> device itself is left in the same failing state.
> 
> Extend the timeout handling path to trigger the existing MANA VF recovery
> service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item.
> This is expected to initiate the appropriate recovery flow by suspende resume
> first and if it fails then trigger a bus rescan.
> 
> This change is intentionally limited to HWC command timeouts and does not
> trigger recovery for errors reported by the SoC as a normal command response.
> 
> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> ---
>  .../net/ethernet/microsoft/mana/gdma_main.c   | 14 +++-------
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 28 ++++++++++++++++++-
>  include/net/mana/gdma.h                       | 16 +++++++++--
>  3 files changed, 45 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 0055c231acf6..16c438d2aaa3 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -490,15 +490,9 @@ static void mana_serv_reset(struct pci_dev *pdev)
>  		dev_info(&pdev->dev, "MANA reset cycle completed\n");
> 
>  out:
> -	gc->in_service = false;
> +	clear_bit(GC_IN_SERVICE, &gc->flags);
>  }
> 
> -struct mana_serv_work {
> -	struct work_struct serv_work;
> -	struct pci_dev *pdev;
> -	enum gdma_eqe_type type;
> -};
> -
>  static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)
> {
>  	switch (type) {
> @@ -542,7 +536,7 @@ static void mana_recovery_delayed_func(struct
> work_struct *w)
>  	spin_unlock_irqrestore(&work->lock, flags);  }
> 
> -static void mana_serv_func(struct work_struct *w)
> +void mana_serv_func(struct work_struct *w)
>  {
>  	struct mana_serv_work *mns_wk;
>  	struct pci_dev *pdev;
> @@ -624,7 +618,7 @@ static void mana_gd_process_eqe(struct gdma_queue
> *eq)
>  			break;
>  		}
> 
> -		if (gc->in_service) {
> +		if (test_bit(GC_IN_SERVICE, &gc->flags)) {
>  			dev_info(gc->dev, "Already in service\n");
>  			break;
>  		}
> @@ -641,7 +635,7 @@ static void mana_gd_process_eqe(struct gdma_queue
> *eq)
>  		}
> 
>  		dev_info(gc->dev, "Start MANA service type:%d\n", type);
> -		gc->in_service = true;
> +		set_bit(GC_IN_SERVICE, &gc->flags);
>  		mns_wk->pdev = to_pci_dev(gc->dev);
>  		mns_wk->type = type;
>  		pci_dev_get(mns_wk->pdev);
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 91c418097284..8da574cf06f2 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -879,7 +879,7 @@ static void mana_tx_timeout(struct net_device *netdev,
> unsigned int txqueue)
>  	struct gdma_context *gc = ac->gdma_dev->gdma_context;
> 
>  	/* Already in service, hence tx queue reset is not required.*/
> -	if (gc->in_service)
> +	if (test_bit(GC_IN_SERVICE, &gc->flags))
>  		return;
> 
>  	/* Note: If there are pending queue reset work for this port(apc), @@ -
> 3533,6 +3533,8 @@ static void mana_gf_stats_work_handler(struct work_struct
> *work)  {
>  	struct mana_context *ac =
>  		container_of(to_delayed_work(work), struct mana_context,
> gf_stats_work);
> +	struct gdma_context *gc = ac->gdma_dev->gdma_context;
> +	struct mana_serv_work *mns_wk;
>  	int err;
> 
>  	err = mana_query_gf_stats(ac);
> @@ -3540,6 +3542,30 @@ static void mana_gf_stats_work_handler(struct
> work_struct *work)
>  		/* HWC timeout detected - reset stats and stop rescheduling */
>  		ac->hwc_timeout_occurred = true;
>  		memset(&ac->hc_stats, 0, sizeof(ac->hc_stats));
> +		dev_warn(gc->dev,
> +			 "Gf stats wk handler: gf stats query timed out.\n");
> +
> +		/* As HWC timed out, indicating a faulty HW state and needs a
> +		 * reset.
> +		 */
> +		if (!test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {
> +			if (!try_module_get(THIS_MODULE)) {
> +				dev_info(gc->dev, "Module is unloading\n");
> +				return;
> +			}
> +
> +			mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
> +			if (!mns_wk) {
> +				module_put(THIS_MODULE);

Maybe it's not necessary: check if you want to call  clear_bit(GC_IN_SERVICE, &gc->flags) here?

> +				return;
> +			}
> +
> +			mns_wk->pdev = to_pci_dev(gc->dev);
> +			mns_wk->type = GDMA_EQE_HWC_RESET_REQUEST;
> +			pci_dev_get(mns_wk->pdev);
> +			INIT_WORK(&mns_wk->serv_work, mana_serv_func);
> +			schedule_work(&mns_wk->serv_work);
> +		}
>  		return;
>  	}
>  	schedule_delayed_work(&ac->gf_stats_work,
> MANA_GF_STATS_PERIOD); diff --git a/include/net/mana/gdma.h


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout
  2026-02-24  8:26 ` [net-next] " Simon Horman
@ 2026-02-27  8:07   ` Dipayaan Roy
  0 siblings, 0 replies; 5+ messages in thread
From: Dipayaan Roy @ 2026-02-27  8:07 UTC (permalink / raw)
  To: Simon Horman
  Cc: kuba, decui, pabeni, dipayanroy, kotaranov, linux-kernel,
	edumazet, davem, linux-rdma, shradhagupta, kys, ssengar, haiyangz,
	linux-hyperv, wei.liu, netdev, longli, ernis, shirazsaleem,
	andrew+netdev

On Tue, Feb 24, 2026 at 08:26:41AM +0000, Simon Horman wrote:
> This is an AI-generated review of your patch. The human sending this
> email has considered the AI review valid, or at least plausible.
> 
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout
> 
> The GF stats periodic query is used as mechanism to monitor HWC health
> check. If this HWC command times out, it is a strong indication that
> the device/SoC is in a faulty state and requires recovery.
> 
> Today, when a timeout is detected, the driver marks
> hwc_timeout_occurred, clears cached stats, and stops rescheduling the
> periodic work. However, the device itself is left in the same failing
> state.
> 
> Extend the timeout handling path to trigger the existing MANA VF
> recovery service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item.
> This is expected to initiate the appropriate recovery flow by suspende
> resume first and if it fails then trigger a bus rescan.
> 
> This change is intentionally limited to HWC command timeouts and does
> not trigger recovery for errors reported by the SoC as a normal command
> response.
> 
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > index 0055c231acf6..16c438d2aaa3 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> 
> [ ... ]
> 
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > index 9b5a72ada5c4..e6a4034b40f9 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> 
> [ ... ]
> 
> > @@ -3530,6 +3530,8 @@ static void mana_gf_stats_work_handler(struct work_struct *work)
> >  {
> >  	struct mana_context *ac =
> >  		container_of(to_delayed_work(work), struct mana_context, gf_stats_work);
> > +	struct gdma_context *gc = ac->gdma_dev->gdma_context;
> > +	struct mana_serv_work *mns_wk;
> >  	int err;
> >
> >  	err = mana_query_gf_stats(ac);
> > @@ -3537,6 +3539,30 @@ static void mana_gf_stats_work_handler(struct work_struct *work)
> >  		/* HWC timeout detected - reset stats and stop rescheduling */
> >  		ac->hwc_timeout_occurred = true;
> >  		memset(&ac->hc_stats, 0, sizeof(ac->hc_stats));
> > +		dev_warn(gc->dev,
> > +			 "Gf stats wk handler: gf stats query timed out.\n");
> > +
> > +		/* As HWC timed out, indicating a faulty HW state and needs a
> > +		 * reset.
> > +		 */
> > +		if (!test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {
> > +			if (!try_module_get(THIS_MODULE)) {
> > +				dev_info(gc->dev, "Module is unloading\n");
> > +				return;
> > +			}
> > +
> > +			mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
> > +			if (!mns_wk) {
> > +				module_put(THIS_MODULE);
> > +				return;
> > +			}
> 
> Does this leave the GC_IN_SERVICE flag permanently set? When
> test_and_set_bit() atomically sets the bit and then try_module_get()
> fails or kzalloc() fails, the function returns without clearing
> GC_IN_SERVICE.
> 
> This would permanently block all future recovery attempts. Looking at
> mana_gd_process_eqe() in gdma_main.c:
>
ACK. 
> 	if (test_bit(GC_IN_SERVICE, &gc->flags)) {
> 		dev_info(gc->dev, "Already in service\n");
> 		break;
> 	}
> 
> It would detect the set bit and skip the reset. Similarly, mana_tx_timeout()
> in mana_en.c would also short-circuit:
> 
> 	if (test_bit(GC_IN_SERVICE, &gc->flags))
> 		return;
> 
> The device cannot recover without manual intervention since the flag is only
> cleared by mana_serv_reset() at the end of the service work:
> 
> drivers/net/ethernet/microsoft/mana/gdma_main.c:mana_serv_reset()
> {
> 	...
> out:
> 	clear_bit(GC_IN_SERVICE, &gc->flags);
> }
> 
> But since the work is never scheduled in these error paths, clear_bit() is
> never called.
> 
> Note that the existing mana_gd_process_eqe() path avoids this problem by
> calling set_bit(GC_IN_SERVICE) only AFTER all allocations succeed:
> 
> drivers/net/ethernet/microsoft/mana/gdma_main.c:mana_gd_process_eqe()
> {
> 	...
> 	if (test_bit(GC_IN_SERVICE, &gc->flags)) {
> 		dev_info(gc->dev, "Already in service\n");
> 		break;
> 	}
> 
> 	if (!try_module_get(THIS_MODULE)) {
> 		...
> 		break;
> 	}
> 
> 	mns_wk = kzalloc(sizeof(*mns_wk), GFP_KERNEL);
> 	if (!mns_wk) {
> 		module_put(THIS_MODULE);
> 		break;
> 	}
> 
> 	dev_info(gc->dev, "Start MANA service type:%d\n", type);
> 	set_bit(GC_IN_SERVICE, &gc->flags);  // Set only after success
> 	...
> }
> 
> Should clear_bit(GC_IN_SERVICE, &gc->flags) be added before each error
> return?
> 
ACK.
> > +
> > +			mns_wk->pdev = to_pci_dev(gc->dev);
> > +			mns_wk->type = GDMA_EQE_HWC_RESET_REQUEST;
> > +			pci_dev_get(mns_wk->pdev);
> > +			INIT_WORK(&mns_wk->serv_work, mana_serv_func);
> > +			schedule_work(&mns_wk->serv_work);
> > +		}
> 
> This isn't a bug, but the recovery initiation sequence here is a
> near-duplicate of the pattern in mana_gd_process_eqe() (gdma_main.c).
> Both perform the identical 7-step sequence: try_module_get, kzalloc
> mana_serv_work, set pdev, set type, pci_dev_get, INIT_WORK, schedule_work.
> 
> The duplication already contributed to the GC_IN_SERVICE error path issue
> above, where this new copy diverged from the original in its error handling.
> 
> Consider extracting a common helper to avoid divergence.
> 
ACK.
> >  		return;
> >  	}
> >  	schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
> 
> [ ... ]

Thanks Simon, I am addressing these in v2.

Regards

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout
  2026-02-26 19:48 ` [PATCH, net-next] " Long Li
@ 2026-02-27  8:10   ` Dipayaan Roy
  0 siblings, 0 replies; 5+ messages in thread
From: Dipayaan Roy @ 2026-02-27  8:10 UTC (permalink / raw)
  To: Long Li
  Cc: KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org, Dexuan Cui,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, Konstantin Taranov,
	horms@kernel.org, shradhagupta@linux.microsoft.com,
	ssengar@linux.microsoft.com, ernis@linux.microsoft.com,
	Shiraz Saleem, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-rdma@vger.kernel.org, Dipayaan Roy

On Thu, Feb 26, 2026 at 07:48:31PM +0000, Long Li wrote:
> > The GF stats periodic query is used as mechanism to monitor HWC health check.
> > If this HWC command times out, it is a strong indication that the device/SoC is in a
> > faulty state and requires recovery.
> > 
> > Today, when a timeout is detected, the driver marks hwc_timeout_occurred,
> > clears cached stats, and stops rescheduling the periodic work. However, the
> > device itself is left in the same failing state.
> > 
> > Extend the timeout handling path to trigger the existing MANA VF recovery
> > service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item.
> > This is expected to initiate the appropriate recovery flow by suspende resume
> > first and if it fails then trigger a bus rescan.
> > 
> > This change is intentionally limited to HWC command timeouts and does not
> > trigger recovery for errors reported by the SoC as a normal command response.
> > 
> > Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> > ---
> >  .../net/ethernet/microsoft/mana/gdma_main.c   | 14 +++-------
> >  drivers/net/ethernet/microsoft/mana/mana_en.c | 28 ++++++++++++++++++-
> >  include/net/mana/gdma.h                       | 16 +++++++++--
> >  3 files changed, 45 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > index 0055c231acf6..16c438d2aaa3 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > @@ -490,15 +490,9 @@ static void mana_serv_reset(struct pci_dev *pdev)
> >  		dev_info(&pdev->dev, "MANA reset cycle completed\n");
> > 
> >  out:
> > -	gc->in_service = false;
> > +	clear_bit(GC_IN_SERVICE, &gc->flags);
> >  }
> > 
> > -struct mana_serv_work {
> > -	struct work_struct serv_work;
> > -	struct pci_dev *pdev;
> > -	enum gdma_eqe_type type;
> > -};
> > -
> >  static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)
> > {
> >  	switch (type) {
> > @@ -542,7 +536,7 @@ static void mana_recovery_delayed_func(struct
> > work_struct *w)
> >  	spin_unlock_irqrestore(&work->lock, flags);  }
> > 
> > -static void mana_serv_func(struct work_struct *w)
> > +void mana_serv_func(struct work_struct *w)
> >  {
> >  	struct mana_serv_work *mns_wk;
> >  	struct pci_dev *pdev;
> > @@ -624,7 +618,7 @@ static void mana_gd_process_eqe(struct gdma_queue
> > *eq)
> >  			break;
> >  		}
> > 
> > -		if (gc->in_service) {
> > +		if (test_bit(GC_IN_SERVICE, &gc->flags)) {
> >  			dev_info(gc->dev, "Already in service\n");
> >  			break;
> >  		}
> > @@ -641,7 +635,7 @@ static void mana_gd_process_eqe(struct gdma_queue
> > *eq)
> >  		}
> > 
> >  		dev_info(gc->dev, "Start MANA service type:%d\n", type);
> > -		gc->in_service = true;
> > +		set_bit(GC_IN_SERVICE, &gc->flags);
> >  		mns_wk->pdev = to_pci_dev(gc->dev);
> >  		mns_wk->type = type;
> >  		pci_dev_get(mns_wk->pdev);
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > index 91c418097284..8da574cf06f2 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > @@ -879,7 +879,7 @@ static void mana_tx_timeout(struct net_device *netdev,
> > unsigned int txqueue)
> >  	struct gdma_context *gc = ac->gdma_dev->gdma_context;
> > 
> >  	/* Already in service, hence tx queue reset is not required.*/
> > -	if (gc->in_service)
> > +	if (test_bit(GC_IN_SERVICE, &gc->flags))
> >  		return;
> > 
> >  	/* Note: If there are pending queue reset work for this port(apc), @@ -
> > 3533,6 +3533,8 @@ static void mana_gf_stats_work_handler(struct work_struct
> > *work)  {
> >  	struct mana_context *ac =
> >  		container_of(to_delayed_work(work), struct mana_context,
> > gf_stats_work);
> > +	struct gdma_context *gc = ac->gdma_dev->gdma_context;
> > +	struct mana_serv_work *mns_wk;
> >  	int err;
> > 
> >  	err = mana_query_gf_stats(ac);
> > @@ -3540,6 +3542,30 @@ static void mana_gf_stats_work_handler(struct
> > work_struct *work)
> >  		/* HWC timeout detected - reset stats and stop rescheduling */
> >  		ac->hwc_timeout_occurred = true;
> >  		memset(&ac->hc_stats, 0, sizeof(ac->hc_stats));
> > +		dev_warn(gc->dev,
> > +			 "Gf stats wk handler: gf stats query timed out.\n");
> > +
> > +		/* As HWC timed out, indicating a faulty HW state and needs a
> > +		 * reset.
> > +		 */
> > +		if (!test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {
> > +			if (!try_module_get(THIS_MODULE)) {
> > +				dev_info(gc->dev, "Module is unloading\n");
> > +				return;
> > +			}
> > +
> > +			mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
> > +			if (!mns_wk) {
> > +				module_put(THIS_MODULE);
> 
> Maybe it's not necessary: check if you want to call  clear_bit(GC_IN_SERVICE, &gc->flags) here?
>
yes it makes sense to clear it here. 
> > +				return;
> > +			}
> > +
> > +			mns_wk->pdev = to_pci_dev(gc->dev);
> > +			mns_wk->type = GDMA_EQE_HWC_RESET_REQUEST;
> > +			pci_dev_get(mns_wk->pdev);
> > +			INIT_WORK(&mns_wk->serv_work, mana_serv_func);
> > +			schedule_work(&mns_wk->serv_work);
> > +		}
> >  		return;
> >  	}
> >  	schedule_delayed_work(&ac->gf_stats_work,
> > MANA_GF_STATS_PERIOD); diff --git a/include/net/mana/gdma.h
> 

Regards


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-02-27  8:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-23  8:47 [PATCH, net-next] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout Dipayaan Roy
2026-02-24  8:26 ` [net-next] " Simon Horman
2026-02-27  8:07   ` Dipayaan Roy
2026-02-26 19:48 ` [PATCH, net-next] " Long Li
2026-02-27  8:10   ` Dipayaan Roy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox