netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next,v3] net: mana: Add handler for hardware servicing events
@ 2025-05-12 19:57 Haiyang Zhang
  2025-05-13  6:27 ` Shradha Gupta
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Haiyang Zhang @ 2025-05-12 19:57 UTC (permalink / raw)
  To: linux-hyperv, netdev
  Cc: haiyangz, decui, stephen, kys, paulros, olaf, vkuznets, davem,
	wei.liu, edumazet, kuba, pabeni, leon, longli, ssengar,
	linux-rdma, daniel, john.fastabend, bpf, ast, hawk, tglx,
	shradhagupta, andrew+netdev, kotaranov, horms, linux-kernel

To collaborate with hardware servicing events, upon receiving the special
EQE notification from the HW channel, remove the devices on this bus.
Then, after a waiting period based on the device specs, rescan the parent
bus to recover the devices.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
v3:
Updated for checkpatch warnings as suggested by Simon Horman.

v2:
Added dev_dbg for service type as suggested by Shradha Gupta.
Added driver cap bit.

---
 .../net/ethernet/microsoft/mana/gdma_main.c   | 64 +++++++++++++++++++
 include/net/mana/gdma.h                       | 11 +++-
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 4ffaf7588885..3102bd2b875b 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -352,11 +352,55 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
 }
 EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
 
+#define MANA_SERVICE_PERIOD 10
+
+struct mana_serv_work {
+	struct work_struct serv_work;
+	struct pci_dev *pdev;
+};
+
+static void mana_serv_func(struct work_struct *w)
+{
+	struct mana_serv_work *mns_wk;
+	struct pci_bus *bus, *parent;
+	struct pci_dev *pdev;
+
+	mns_wk = container_of(w, struct mana_serv_work, serv_work);
+	pdev = mns_wk->pdev;
+
+	if (!pdev)
+		goto out;
+
+	bus = pdev->bus;
+	if (!bus) {
+		dev_err(&pdev->dev, "MANA service: no bus\n");
+		goto out;
+	}
+
+	parent = bus->parent;
+	if (!parent) {
+		dev_err(&pdev->dev, "MANA service: no parent bus\n");
+		goto out;
+	}
+
+	pci_stop_and_remove_bus_device_locked(bus->self);
+
+	msleep(MANA_SERVICE_PERIOD * 1000);
+
+	pci_lock_rescan_remove();
+	pci_rescan_bus(parent);
+	pci_unlock_rescan_remove();
+
+out:
+	kfree(mns_wk);
+}
+
 static void mana_gd_process_eqe(struct gdma_queue *eq)
 {
 	u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
 	struct gdma_context *gc = eq->gdma_dev->gdma_context;
 	struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
+	struct mana_serv_work *mns_wk;
 	union gdma_eqe_info eqe_info;
 	enum gdma_eqe_type type;
 	struct gdma_event event;
@@ -400,6 +444,26 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
 		eq->eq.callback(eq->eq.context, eq, &event);
 		break;
 
+	case GDMA_EQE_HWC_FPGA_RECONFIG:
+	case GDMA_EQE_HWC_SOCMANA_CRASH:
+		dev_dbg(gc->dev, "Recv MANA service type:%d\n", type);
+
+		if (gc->in_service) {
+			dev_info(gc->dev, "Already in service\n");
+			break;
+		}
+
+		mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
+		if (!mns_wk)
+			break;
+
+		dev_info(gc->dev, "Start MANA service type:%d\n", type);
+		gc->in_service = true;
+		mns_wk->pdev = to_pci_dev(gc->dev);
+		INIT_WORK(&mns_wk->serv_work, mana_serv_func);
+		schedule_work(&mns_wk->serv_work);
+		break;
+
 	default:
 		break;
 	}
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 228603bf03f2..d0fbc9c64cc8 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -58,8 +58,9 @@ enum gdma_eqe_type {
 	GDMA_EQE_HWC_INIT_EQ_ID_DB	= 129,
 	GDMA_EQE_HWC_INIT_DATA		= 130,
 	GDMA_EQE_HWC_INIT_DONE		= 131,
-	GDMA_EQE_HWC_SOC_RECONFIG	= 132,
+	GDMA_EQE_HWC_FPGA_RECONFIG	= 132,
 	GDMA_EQE_HWC_SOC_RECONFIG_DATA	= 133,
+	GDMA_EQE_HWC_SOCMANA_CRASH	= 135,
 	GDMA_EQE_RNIC_QP_FATAL		= 176,
 };
 
@@ -388,6 +389,8 @@ struct gdma_context {
 	u32			test_event_eq_id;
 
 	bool			is_pf;
+	bool			in_service;
+
 	phys_addr_t		bar0_pa;
 	void __iomem		*bar0_va;
 	void __iomem		*shm_base;
@@ -558,12 +561,16 @@ enum {
 /* Driver can handle holes (zeros) in the device list */
 #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
 
+/* Driver can self reset on EQE notification */
+#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
+
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
 	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
 	 GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
-	 GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
+	 GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
+	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next,v3] net: mana: Add handler for hardware servicing events
  2025-05-12 19:57 [PATCH net-next,v3] net: mana: Add handler for hardware servicing events Haiyang Zhang
@ 2025-05-13  6:27 ` Shradha Gupta
  2025-05-14 12:43 ` Simon Horman
  2025-05-14 19:36 ` Haiyang Zhang
  2 siblings, 0 replies; 4+ messages in thread
From: Shradha Gupta @ 2025-05-13  6:27 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: linux-hyperv, netdev, decui, stephen, kys, paulros, olaf,
	vkuznets, davem, wei.liu, edumazet, kuba, pabeni, leon, longli,
	ssengar, linux-rdma, daniel, john.fastabend, bpf, ast, hawk, tglx,
	andrew+netdev, kotaranov, horms, linux-kernel

On Mon, May 12, 2025 at 12:57:54PM -0700, Haiyang Zhang wrote:
> To collaborate with hardware servicing events, upon receiving the special
> EQE notification from the HW channel, remove the devices on this bus.
> Then, after a waiting period based on the device specs, rescan the parent
> bus to recover the devices.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
> v3:
> Updated for checkpatch warnings as suggested by Simon Horman.
> 
> v2:
> Added dev_dbg for service type as suggested by Shradha Gupta.
> Added driver cap bit.
> 
> ---
>  .../net/ethernet/microsoft/mana/gdma_main.c   | 64 +++++++++++++++++++
>  include/net/mana/gdma.h                       | 11 +++-
>  2 files changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 4ffaf7588885..3102bd2b875b 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -352,11 +352,55 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
>  }
>  EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
>  
> +#define MANA_SERVICE_PERIOD 10
> +
> +struct mana_serv_work {
> +	struct work_struct serv_work;
> +	struct pci_dev *pdev;
> +};
> +
> +static void mana_serv_func(struct work_struct *w)
> +{
> +	struct mana_serv_work *mns_wk;
> +	struct pci_bus *bus, *parent;
> +	struct pci_dev *pdev;
> +
> +	mns_wk = container_of(w, struct mana_serv_work, serv_work);
> +	pdev = mns_wk->pdev;
> +
> +	if (!pdev)
> +		goto out;
> +
> +	bus = pdev->bus;
> +	if (!bus) {
> +		dev_err(&pdev->dev, "MANA service: no bus\n");
> +		goto out;
> +	}
> +
> +	parent = bus->parent;
> +	if (!parent) {
> +		dev_err(&pdev->dev, "MANA service: no parent bus\n");
> +		goto out;
> +	}
> +
> +	pci_stop_and_remove_bus_device_locked(bus->self);
> +
> +	msleep(MANA_SERVICE_PERIOD * 1000);
> +
> +	pci_lock_rescan_remove();
> +	pci_rescan_bus(parent);
> +	pci_unlock_rescan_remove();
> +
> +out:
> +	kfree(mns_wk);
> +}
> +
>  static void mana_gd_process_eqe(struct gdma_queue *eq)
>  {
>  	u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
>  	struct gdma_context *gc = eq->gdma_dev->gdma_context;
>  	struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
> +	struct mana_serv_work *mns_wk;
>  	union gdma_eqe_info eqe_info;
>  	enum gdma_eqe_type type;
>  	struct gdma_event event;
> @@ -400,6 +444,26 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
>  		eq->eq.callback(eq->eq.context, eq, &event);
>  		break;
>  
> +	case GDMA_EQE_HWC_FPGA_RECONFIG:
> +	case GDMA_EQE_HWC_SOCMANA_CRASH:
> +		dev_dbg(gc->dev, "Recv MANA service type:%d\n", type);
> +
> +		if (gc->in_service) {
> +			dev_info(gc->dev, "Already in service\n");
> +			break;
> +		}
> +
> +		mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
> +		if (!mns_wk)
> +			break;
> +
> +		dev_info(gc->dev, "Start MANA service type:%d\n", type);
> +		gc->in_service = true;
> +		mns_wk->pdev = to_pci_dev(gc->dev);
> +		INIT_WORK(&mns_wk->serv_work, mana_serv_func);
> +		schedule_work(&mns_wk->serv_work);
> +		break;
> +
>  	default:
>  		break;
>  	}
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index 228603bf03f2..d0fbc9c64cc8 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -58,8 +58,9 @@ enum gdma_eqe_type {
>  	GDMA_EQE_HWC_INIT_EQ_ID_DB	= 129,
>  	GDMA_EQE_HWC_INIT_DATA		= 130,
>  	GDMA_EQE_HWC_INIT_DONE		= 131,
> -	GDMA_EQE_HWC_SOC_RECONFIG	= 132,
> +	GDMA_EQE_HWC_FPGA_RECONFIG	= 132,
>  	GDMA_EQE_HWC_SOC_RECONFIG_DATA	= 133,
> +	GDMA_EQE_HWC_SOCMANA_CRASH	= 135,
>  	GDMA_EQE_RNIC_QP_FATAL		= 176,
>  };
>  
> @@ -388,6 +389,8 @@ struct gdma_context {
>  	u32			test_event_eq_id;
>  
>  	bool			is_pf;
> +	bool			in_service;
> +
>  	phys_addr_t		bar0_pa;
>  	void __iomem		*bar0_va;
>  	void __iomem		*shm_base;
> @@ -558,12 +561,16 @@ enum {
>  /* Driver can handle holes (zeros) in the device list */
>  #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
>  
> +/* Driver can self reset on EQE notification */
> +#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
> +
>  #define GDMA_DRV_CAP_FLAGS1 \
>  	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
>  	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
>  	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
>  	 GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
> -	 GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
> +	 GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
> +	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE)
>  
>  #define GDMA_DRV_CAP_FLAGS2 0
>  
> -- 
> 2.34.1

Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next,v3] net: mana: Add handler for hardware servicing events
  2025-05-12 19:57 [PATCH net-next,v3] net: mana: Add handler for hardware servicing events Haiyang Zhang
  2025-05-13  6:27 ` Shradha Gupta
@ 2025-05-14 12:43 ` Simon Horman
  2025-05-14 19:36 ` Haiyang Zhang
  2 siblings, 0 replies; 4+ messages in thread
From: Simon Horman @ 2025-05-14 12:43 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: linux-hyperv, netdev, decui, stephen, kys, paulros, olaf,
	vkuznets, davem, wei.liu, edumazet, kuba, pabeni, leon, longli,
	ssengar, linux-rdma, daniel, john.fastabend, bpf, ast, hawk, tglx,
	shradhagupta, andrew+netdev, kotaranov, linux-kernel

On Mon, May 12, 2025 at 12:57:54PM -0700, Haiyang Zhang wrote:
> To collaborate with hardware servicing events, upon receiving the special
> EQE notification from the HW channel, remove the devices on this bus.
> Then, after a waiting period based on the device specs, rescan the parent
> bus to recover the devices.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH net-next,v3] net: mana: Add handler for hardware servicing events
  2025-05-12 19:57 [PATCH net-next,v3] net: mana: Add handler for hardware servicing events Haiyang Zhang
  2025-05-13  6:27 ` Shradha Gupta
  2025-05-14 12:43 ` Simon Horman
@ 2025-05-14 19:36 ` Haiyang Zhang
  2 siblings, 0 replies; 4+ messages in thread
From: Haiyang Zhang @ 2025-05-14 19:36 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Dexuan Cui, stephen@networkplumber.org, KY Srinivasan,
	Paul Rosswurm, olaf@aepfle.de, vkuznets@redhat.com,
	davem@davemloft.net, wei.liu@kernel.org, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, leon@kernel.org, Long Li,
	ssengar@linux.microsoft.com, linux-rdma@vger.kernel.org,
	daniel@iogearbox.net, john.fastabend@gmail.com,
	bpf@vger.kernel.org, ast@kernel.org, hawk@kernel.org,
	tglx@linutronix.de, shradhagupta@linux.microsoft.com,
	andrew+netdev@lunn.ch, Konstantin Taranov, horms@kernel.org,
	linux-kernel@vger.kernel.org



> -----Original Message-----
> From: LKML haiyangz <lkmlhyz@microsoft.com> On Behalf Of Haiyang Zhang
> Sent: Monday, May 12, 2025 3:58 PM
> To: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org
> Cc: Haiyang Zhang <haiyangz@microsoft.com>; Dexuan Cui
> <decui@microsoft.com>; stephen@networkplumber.org; KY Srinivasan
> <kys@microsoft.com>; Paul Rosswurm <paulros@microsoft.com>;
> olaf@aepfle.de; vkuznets@redhat.com; davem@davemloft.net;
> wei.liu@kernel.org; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; hawk@kernel.org; tglx@linutronix.de;
> shradhagupta@linux.microsoft.com; andrew+netdev@lunn.ch; Konstantin
> Taranov <kotaranov@microsoft.com>; horms@kernel.org; linux-
> kernel@vger.kernel.org
> Subject: [PATCH net-next,v3] net: mana: Add handler for hardware servicing
> events
> 
> To collaborate with hardware servicing events, upon receiving the special
> EQE notification from the HW channel, remove the devices on this bus.
> Then, after a waiting period based on the device specs, rescan the parent
> bus to recover the devices.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
> v3:
> Updated for checkpatch warnings as suggested by Simon Horman.
> 
> v2:
> Added dev_dbg for service type as suggested by Shradha Gupta.
> Added driver cap bit.
> 
> ---

Thanks for the reviews. 
I will submit v4 soon with a minor name change.

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-05-14 19:36 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-12 19:57 [PATCH net-next,v3] net: mana: Add handler for hardware servicing events Haiyang Zhang
2025-05-13  6:27 ` Shradha Gupta
2025-05-14 12:43 ` Simon Horman
2025-05-14 19:36 ` Haiyang Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).