public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
* [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable
@ 2026-02-20  1:09 longli
  2026-02-20  1:09 ` [PATCH 2/8] net/netvsc: fix race conditions on VF add/remove events longli
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: longli @ 2026-02-20  1:09 UTC (permalink / raw)
  To: dev, Stephen Hemminger, Wei Hu, stable; +Cc: Long Li

From: Long Li <longli@microsoft.com>

Secondary process should not attempt to configure promiscuous mode
on the netvsc device as it is managed by the primary process.

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/netvsc/hn_ethdev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 6584819f4f..5d7b410f1b 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -480,6 +480,9 @@ hn_dev_promiscuous_enable(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
 	hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
 	return hn_vf_promiscuous_enable(dev);
 }
@@ -490,6 +493,9 @@ hn_dev_promiscuous_disable(struct rte_eth_dev *dev)
 	struct hn_data *hv = dev->data->dev_private;
 	uint32_t filter;
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
 	filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST;
 	if (dev->data->all_multicast)
 		filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/8] net/netvsc: fix race conditions on VF add/remove events
  2026-02-20  1:09 [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable longli
@ 2026-02-20  1:09 ` longli
  2026-02-20  1:09 ` [PATCH 3/8] net/netvsc: add multi-process VF device removal support longli
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: longli @ 2026-02-20  1:09 UTC (permalink / raw)
  To: dev, Stephen Hemminger, Wei Hu, stable; +Cc: Long Li

From: Long Li <longli@microsoft.com>

Netvsc gets notification from VSP on VF add/remove over VMBUS, but the
timing may not match the DPDK sequence of device events triggered from
uevents from kernel.

Remove the retry logic from the code when attach to VF and rely on DPDK
event to attach to VF. With this change, both the notifications from VSP
and the DPDK will attempt a VF attach.

Also implement locking when checking on all VF related fields.

Fixes: a2a23a794b3a ("net/netvsc: support VF device hot add/remove")
Cc: stable@dpdk.org

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/netvsc/hn_ethdev.c |   6 +-
 drivers/net/netvsc/hn_rxtx.c   |  40 +++++------
 drivers/net/netvsc/hn_var.h    |   1 +
 drivers/net/netvsc/hn_vf.c     | 118 ++++++++++++++++-----------------
 4 files changed, 79 insertions(+), 86 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 5d7b410f1b..1dcab189e2 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -678,6 +678,7 @@ static void netvsc_hotplug_retry(void *args)
 
 			free(drv_str);
 
+			hn_vf_add(dev, hv);
 			break;
 		}
 	}
@@ -1418,11 +1419,12 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 	hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
 
 	/* If VF was reported but not added, do it now */
+	rte_rwlock_write_lock(&hv->vf_lock);
 	if (hv->vf_ctx.vf_vsp_reported && !hv->vf_ctx.vf_vsc_switched) {
 		PMD_INIT_LOG(DEBUG, "Adding VF device");
-
-		err = hn_vf_add(eth_dev, hv);
+		err = __hn_vf_add(eth_dev, hv);
 	}
+	rte_rwlock_write_unlock(&hv->vf_lock);
 
 	return 0;
 
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 72dab26ede..0d770d1b25 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -1540,20 +1540,18 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		hn_process_events(hv, txq->queue_id, 0);
 
 	/* Transmit over VF if present and up */
-	if (hv->vf_ctx.vf_vsc_switched) {
-		rte_rwlock_read_lock(&hv->vf_lock);
-		vf_dev = hn_get_vf_dev(hv);
-		if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
-		    vf_dev->data->dev_started) {
-			void *sub_q = vf_dev->data->tx_queues[queue_id];
-
-			nb_tx = (*vf_dev->tx_pkt_burst)
-					(sub_q, tx_pkts, nb_pkts);
-			rte_rwlock_read_unlock(&hv->vf_lock);
-			return nb_tx;
-		}
+	rte_rwlock_read_lock(&hv->vf_lock);
+	vf_dev = hn_get_vf_dev(hv);
+	if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
+	    vf_dev->data->dev_started) {
+		void *sub_q = vf_dev->data->tx_queues[queue_id];
+
+		nb_tx = (*vf_dev->tx_pkt_burst)
+				(sub_q, tx_pkts, nb_pkts);
 		rte_rwlock_read_unlock(&hv->vf_lock);
+		return nb_tx;
 	}
+	rte_rwlock_read_unlock(&hv->vf_lock);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *m = tx_pkts[nb_tx];
@@ -1684,17 +1682,15 @@ hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 					   (void **)rx_pkts, nb_pkts, NULL);
 
 	/* If VF is available, check that as well */
-	if (hv->vf_ctx.vf_vsc_switched) {
-		rte_rwlock_read_lock(&hv->vf_lock);
-		vf_dev = hn_get_vf_dev(hv);
-		if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
-		    vf_dev->data->dev_started)
-			nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq,
-					     rx_pkts + nb_rcv,
-					     nb_pkts - nb_rcv);
+	rte_rwlock_read_lock(&hv->vf_lock);
+	vf_dev = hn_get_vf_dev(hv);
+	if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
+	    vf_dev->data->dev_started)
+		nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq,
+				     rx_pkts + nb_rcv,
+				     nb_pkts - nb_rcv);
+	rte_rwlock_read_unlock(&hv->vf_lock);
 
-		rte_rwlock_read_unlock(&hv->vf_lock);
-	}
 	return nb_rcv;
 }
 
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 17c1d5d07b..32fe373cb6 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -239,6 +239,7 @@ hn_get_vf_dev(const struct hn_data *hv)
 int	hn_vf_info_get(struct hn_data *hv,
 		       struct rte_eth_dev_info *info);
 int	hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
+int	__hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
 int	hn_vf_configure_locked(struct rte_eth_dev *dev,
 			       const struct rte_eth_conf *dev_conf);
 const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev,
diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index 0ecfaf54ea..e4bebda147 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -59,8 +59,8 @@ static int hn_vf_attach(struct rte_eth_dev *dev, struct hn_data *hv)
 	int port, ret;
 
 	if (hv->vf_ctx.vf_attached) {
-		PMD_DRV_LOG(ERR, "VF already attached");
-		return 0;
+		PMD_DRV_LOG(NOTICE, "VF already attached");
+		return -EEXIST;
 	}
 
 	port = hn_vf_match(dev);
@@ -91,10 +91,30 @@ static int hn_vf_attach(struct rte_eth_dev *dev, struct hn_data *hv)
 	PMD_DRV_LOG(DEBUG, "Attach VF device %u", port);
 	hv->vf_ctx.vf_attached = true;
 	hv->vf_ctx.vf_port = port;
+
+	ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
+					    RTE_ETH_EVENT_INTR_RMV,
+					    hn_eth_rmv_event_callback,
+					    hv);
+	if (ret) {
+		/* Rollback state changes on callback registration failure */
+		hv->vf_ctx.vf_attached = false;
+		hv->vf_ctx.vf_port = 0;
+
+		/* Release port ownership */
+		if (rte_eth_dev_owner_unset(port, hv->owner.id) < 0)
+			PMD_DRV_LOG(ERR, "Failed to unset owner for port %d", port);
+
+		PMD_DRV_LOG(ERR,
+			    "Registering callback failed for vf port %d ret %d",
+			    port, ret);
+		return ret;
+	}
+
 	return 0;
 }
 
-static void hn_vf_remove(struct hn_data *hv);
+static void __hn_vf_remove(struct hn_data *hv);
 
 static void hn_remove_delayed(void *args)
 {
@@ -104,12 +124,12 @@ static void hn_remove_delayed(void *args)
 	int ret;
 	bool all_eth_removed;
 
-	/* Tell VSP to switch data path to synthetic */
-	hn_vf_remove(hv);
-
 	PMD_DRV_LOG(NOTICE, "Start to remove port %d", port_id);
 	rte_rwlock_write_lock(&hv->vf_lock);
 
+	/* Tell VSP to switch data path to synthetic */
+	__hn_vf_remove(hv);
+
 	/* Give back ownership */
 	ret = rte_eth_dev_owner_unset(port_id, hv->owner.id);
 	if (ret)
@@ -213,36 +233,20 @@ static int hn_setup_vf_queues(int port, struct rte_eth_dev *dev)
 	return ret;
 }
 
-int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
-
-static void hn_vf_add_retry(void *args)
-{
-	struct rte_eth_dev *dev = args;
-	struct hn_data *hv = dev->data->dev_private;
-
-	hn_vf_add(dev, hv);
-}
-
 int hn_vf_configure(struct rte_eth_dev *dev,
 		    const struct rte_eth_conf *dev_conf);
 
-/* Add new VF device to synthetic device */
-int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
+/* Add new VF device to synthetic device, unlocked version */
+int __hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
 {
-	int ret, port;
+	int ret = 0, port;
 
 	if (!hv->vf_ctx.vf_vsp_reported || hv->vf_ctx.vf_vsc_switched)
-		return 0;
-
-	rte_rwlock_write_lock(&hv->vf_lock);
+		goto exit;
 
 	ret = hn_vf_attach(dev, hv);
-	if (ret) {
-		PMD_DRV_LOG(NOTICE,
-			    "RNDIS reports VF but device not found, retrying");
-		rte_eal_alarm_set(1000000, hn_vf_add_retry, dev);
+	if (ret && ret != -EEXIST)
 		goto exit;
-	}
 
 	port = hv->vf_ctx.vf_port;
 
@@ -252,7 +256,7 @@ int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
 	if (dev->data->dev_started) {
 		if (rte_eth_devices[port].data->dev_started) {
 			PMD_DRV_LOG(ERR, "VF already started on hot add");
-			goto exit;
+			goto switch_data_path;
 		}
 
 		PMD_DRV_LOG(NOTICE, "configuring VF port %d", port);
@@ -287,26 +291,32 @@ int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
 		hv->vf_ctx.vf_state = vf_started;
 	}
 
+switch_data_path:
 	ret = hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
 	if (ret == 0)
 		hv->vf_ctx.vf_vsc_switched = true;
 
 exit:
-	rte_rwlock_write_unlock(&hv->vf_lock);
 	return ret;
 }
 
-/* Switch data path to VF device */
-static void hn_vf_remove(struct hn_data *hv)
+/* Add new VF device to synthetic device, locked version */
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
 {
 	int ret;
 
-	if (!hv->vf_ctx.vf_vsc_switched) {
-		PMD_DRV_LOG(ERR, "VF path not active");
-		return;
-	}
-
 	rte_rwlock_write_lock(&hv->vf_lock);
+	ret = __hn_vf_add(dev, hv);
+	rte_rwlock_write_unlock(&hv->vf_lock);
+
+	return ret;
+}
+
+/* Switch data path to VF device, unlocked version */
+static void __hn_vf_remove(struct hn_data *hv)
+{
+	int ret;
+
 	if (!hv->vf_ctx.vf_vsc_switched) {
 		PMD_DRV_LOG(ERR, "VF path not active");
 	} else {
@@ -314,9 +324,9 @@ static void hn_vf_remove(struct hn_data *hv)
 		ret = hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC);
 		if (ret)
 			PMD_DRV_LOG(ERR, "Failed to switch to synthetic");
-		hv->vf_ctx.vf_vsc_switched = false;
+		else
+			hv->vf_ctx.vf_vsc_switched = false;
 	}
-	rte_rwlock_write_unlock(&hv->vf_lock);
 }
 
 /* Handle VF association message from host */
@@ -338,14 +348,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
 		    vf_assoc->allocated ? "add to" : "remove from",
 		    dev->data->port_id);
 
-	hv->vf_ctx.vf_vsp_reported = vf_assoc->allocated;
+	rte_rwlock_write_lock(&hv->vf_lock);
 
+	hv->vf_ctx.vf_vsp_reported = vf_assoc->allocated;
 	if (dev->state == RTE_ETH_DEV_ATTACHED) {
 		if (vf_assoc->allocated)
-			hn_vf_add(dev, hv);
+			__hn_vf_add(dev, hv);
 		else
-			hn_vf_remove(hv);
+			__hn_vf_remove(hv);
 	}
+
+	rte_rwlock_write_unlock(&hv->vf_lock);
 }
 
 static void
@@ -426,29 +439,12 @@ int hn_vf_configure(struct rte_eth_dev *dev,
 	vf_conf.intr_conf.rmv = 1;
 
 	if (hv->vf_ctx.vf_attached) {
-		ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
-						    RTE_ETH_EVENT_INTR_RMV,
-						    hn_eth_rmv_event_callback,
-						    hv);
-		if (ret) {
-			PMD_DRV_LOG(ERR,
-				    "Registering callback failed for vf port %d ret %d",
-				    hv->vf_ctx.vf_port, ret);
-			return ret;
-		}
-
 		ret = rte_eth_dev_configure(hv->vf_ctx.vf_port,
 					    dev->data->nb_rx_queues,
 					    dev->data->nb_tx_queues,
 					    &vf_conf);
 		if (ret) {
 			PMD_DRV_LOG(ERR, "VF configuration failed: %d", ret);
-
-			rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
-							RTE_ETH_EVENT_INTR_RMV,
-							hn_eth_rmv_event_callback,
-							hv);
-
 			return ret;
 		}
 
@@ -555,9 +551,7 @@ int hn_vf_close(struct rte_eth_dev *dev)
 	int ret = 0;
 	struct hn_data *hv = dev->data->dev_private;
 
-	rte_eal_alarm_cancel(hn_vf_add_retry, dev);
-
-	rte_rwlock_read_lock(&hv->vf_lock);
+	rte_rwlock_write_lock(&hv->vf_lock);
 	if (hv->vf_ctx.vf_attached) {
 		rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
 						RTE_ETH_EVENT_INTR_RMV,
@@ -567,7 +561,7 @@ int hn_vf_close(struct rte_eth_dev *dev)
 		ret = rte_eth_dev_close(hv->vf_ctx.vf_port);
 		hv->vf_ctx.vf_attached = false;
 	}
-	rte_rwlock_read_unlock(&hv->vf_lock);
+	rte_rwlock_write_unlock(&hv->vf_lock);
 
 	return ret;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/8] net/netvsc: add multi-process VF device removal support
  2026-02-20  1:09 [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable longli
  2026-02-20  1:09 ` [PATCH 2/8] net/netvsc: fix race conditions on VF add/remove events longli
@ 2026-02-20  1:09 ` longli
  2026-02-20  1:09 ` [PATCH 4/8] net/mana: fix PD resource leak on device close longli
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: longli @ 2026-02-20  1:09 UTC (permalink / raw)
  To: dev, Stephen Hemminger, Wei Hu, stable; +Cc: Long Li

From: Long Li <longli@microsoft.com>

When a VF device is hot-removed by the primary process, secondary
processes must be notified to release their references to the VF port.
Without this, secondary processes retain stale port references leading
to crashes or undefined behavior when accessing the removed device.

This patch adds multi-process communication infrastructure to coordinate
VF removal across all processes:

- Shared memory (netvsc_shared_data) to track secondary process count
- Multi-process message handlers (NETVSC_MP_REQ_VF_REMOVE) to notify
  secondaries when primary removes a VF device
- Secondary handler calls rte_eth_dev_release_port() to cleanly release
  the VF port in its own process space
- Primary waits for all secondaries to acknowledge removal before
  proceeding

The implementation uses rte_mp_request_sync() to ensure all secondary
processes respond within NETVSC_MP_REQ_TIMEOUT_SEC (5 seconds) before
the primary completes the VF removal sequence.

Fixes: 7fc4c0997b04 ("net/netvsc: fix hot adding multiple VF PCI devices")
Cc: stable@dpdk.org

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/netvsc/hn_ethdev.c | 274 ++++++++++++++++++++++++++++++++-
 drivers/net/netvsc/hn_nvs.h    |   5 +
 drivers/net/netvsc/hn_vf.c     |   4 +
 3 files changed, 282 insertions(+), 1 deletion(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 1dcab189e2..62e6f49d3d 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -48,6 +48,31 @@
 	    (var) = (tvar))
 #endif
 
+/* Spinlock for netvsc_shared_data */
+static rte_spinlock_t netvsc_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct netvsc_shared_data {
+	RTE_ATOMIC(uint32_t) secondary_cnt;
+} *netvsc_shared_data;
+
+static const struct rte_memzone *netvsc_shared_mz;
+static const char *MZ_NETVSC_SHARED_DATA = "netvsc_shared_data";
+
+static struct netvsc_local_data {
+	bool init_done;
+	unsigned int primary_cnt;
+	unsigned int secondary_cnt;
+} netvsc_local_data;
+
+#define NETVSC_MP_NAME "net_netvsc_mp"
+#define NETVSC_MP_REQ_TIMEOUT_SEC 5
+
+struct netvsc_mp_param {
+	enum netvsc_mp_req_type type;
+	int port_id;
+	int result;
+};
+
 #define HN_TX_OFFLOAD_CAPS (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \
 			    RTE_ETH_TX_OFFLOAD_TCP_CKSUM  | \
 			    RTE_ETH_TX_OFFLOAD_UDP_CKSUM  | \
@@ -1509,6 +1534,224 @@ static void remove_cache_list(void)
 	rte_spinlock_unlock(&netvsc_lock);
 }
 
+static int
+netvsc_mp_primary_handle(const struct rte_mp_msg *mp_msg __rte_unused,
+			  const void *peer __rte_unused)
+{
+	/* Stub function required for multi-process message handling registration */
+	return 0;
+}
+
+static void
+mp_init_msg(struct rte_mp_msg *msg, enum netvsc_mp_req_type type, int port_id)
+{
+	struct netvsc_mp_param *param;
+
+	strlcpy(msg->name, NETVSC_MP_NAME, sizeof(msg->name));
+	msg->len_param = sizeof(*param);
+
+	param = (struct netvsc_mp_param *)msg->param;
+	param->type = type;
+	param->port_id = port_id;
+}
+
+static int netvsc_secondary_handle_device_remove(struct hn_data *hv)
+{
+	uint16_t port_id = hv->vf_ctx.vf_port;
+	struct rte_eth_dev *dev;
+
+	PMD_DRV_LOG(DEBUG, "Secondary handle eth device remove port %d VF port %d",
+		    hv->port_id, port_id);
+
+	/* VF is already locked by primary */
+	dev = &rte_eth_devices[port_id];
+	return rte_eth_dev_release_port(dev);
+}
+
+static int
+netvsc_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+	struct rte_mp_msg mp_res = { 0 };
+	struct netvsc_mp_param *res = (struct netvsc_mp_param *)mp_res.param;
+	const struct netvsc_mp_param *param =
+		(const struct netvsc_mp_param *)mp_msg->param;
+	struct rte_eth_dev *dev;
+	struct hn_data *hv;
+	int ret = 0;
+
+	if (!rte_eth_dev_is_valid_port(param->port_id)) {
+		PMD_DRV_LOG(ERR, "MP handle port ID %u invalid", param->port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[param->port_id];
+	hv = dev->data->dev_private;
+
+	mp_init_msg(&mp_res, param->type, param->port_id);
+
+	switch (param->type) {
+	case NETVSC_MP_REQ_VF_REMOVE:
+		/* remove the VF from DPDK and netvsc */
+		res->result = netvsc_secondary_handle_device_remove(hv);
+		ret = rte_mp_reply(&mp_res, peer);
+		break;
+
+	default:
+		PMD_DRV_LOG(ERR, "Port %u unknown primary MP type %u",
+			param->port_id, param->type);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int netvsc_mp_init_primary(void)
+{
+	int ret;
+	ret = rte_mp_action_register(NETVSC_MP_NAME, netvsc_mp_primary_handle);
+	if (ret && rte_errno != ENOTSUP) {
+		PMD_DRV_LOG(ERR, "Failed to register primary handler %d %d",
+			ret, rte_errno);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void netvsc_mp_uninit_primary(void)
+{
+	rte_mp_action_unregister(NETVSC_MP_NAME);
+}
+
+static int netvsc_mp_init_secondary(void)
+{
+	return rte_mp_action_register(NETVSC_MP_NAME, netvsc_mp_secondary_handle);
+}
+
+static void netvsc_mp_uninit_secondary(void)
+{
+	rte_mp_action_unregister(NETVSC_MP_NAME);
+}
+
+int netvsc_mp_req_VF(struct hn_data *hv, enum netvsc_mp_req_type type)
+{
+	struct rte_mp_msg mp_req = { 0 };
+	struct rte_mp_msg *mp_res;
+	struct rte_mp_reply mp_rep = { 0 };
+	struct netvsc_mp_param *res;
+	struct timespec ts = {.tv_sec = NETVSC_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+	int i, ret;
+
+	/* if secondary count is 0, return */
+	if (rte_atomic_load_explicit(&netvsc_shared_data->secondary_cnt,
+			rte_memory_order_relaxed) == 0)
+		return 0;
+
+	mp_init_msg(&mp_req, type, hv->port_id);
+
+	ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+	if (ret) {
+		if (rte_errno != ENOTSUP)
+			PMD_DRV_LOG(ERR, "port %u failed to request VF remove",
+				    hv->port_id);
+		else
+			ret = 0;
+		goto exit;
+	}
+
+	if (mp_rep.nb_sent != mp_rep.nb_received) {
+		PMD_DRV_LOG(ERR, "port %u not all secondaries responded type %d",
+			    hv->port_id, type);
+		ret = -1;
+		goto exit;
+	}
+	for (i = 0; i < mp_rep.nb_received; i++) {
+		mp_res = &mp_rep.msgs[i];
+		res = (struct netvsc_mp_param *)mp_res->param;
+		if (res->result) {
+			PMD_DRV_LOG(ERR, "port %u request failed on secondary %d",
+				    hv->port_id, i);
+			ret = -1;
+			goto exit;
+		}
+	}
+
+exit:
+	free(mp_rep.msgs);
+	return ret;
+}
+
+static int netvsc_init_once(void)
+{
+	int ret;
+	const struct rte_memzone *secondary_mz;
+
+	if (netvsc_local_data.init_done)
+		return 0;
+
+	switch (rte_eal_process_type()) {
+	case RTE_PROC_PRIMARY:
+		netvsc_shared_mz = rte_memzone_reserve(MZ_NETVSC_SHARED_DATA,
+				sizeof(*netvsc_shared_data), SOCKET_ID_ANY, 0);
+		if (!netvsc_shared_mz) {
+			PMD_DRV_LOG(ERR, "Cannot allocate netvsc shared data");
+			return -rte_errno;
+		}
+		netvsc_shared_data = netvsc_shared_mz->addr;
+		rte_atomic_store_explicit(&netvsc_shared_data->secondary_cnt,
+				0, rte_memory_order_relaxed);
+
+		ret = netvsc_mp_init_primary();
+		if (ret) {
+			rte_memzone_free(netvsc_shared_mz);
+			break;
+		}
+
+		PMD_DRV_LOG(DEBUG, "MP INIT PRIMARY");
+		netvsc_local_data.init_done = true;
+		break;
+
+	case RTE_PROC_SECONDARY:
+		secondary_mz = rte_memzone_lookup(MZ_NETVSC_SHARED_DATA);
+		if (!secondary_mz) {
+			PMD_DRV_LOG(ERR, "Cannot attach netvsc shared data");
+			return -rte_errno;
+		}
+		netvsc_shared_data = secondary_mz->addr;
+		ret = netvsc_mp_init_secondary();
+		if (ret)
+			break;
+
+		PMD_DRV_LOG(DEBUG, "MP INIT SECONDARY");
+		netvsc_local_data.init_done = true;
+		break;
+
+	default:
+		/* Impossible */
+		ret = -EPROTO;
+		break;
+	}
+
+	return ret;
+}
+
+static void netvsc_uninit_once(void)
+{
+	if (netvsc_local_data.primary_cnt ||
+	    netvsc_local_data.secondary_cnt)
+		return;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		netvsc_mp_uninit_primary();
+		rte_memzone_free(netvsc_shared_mz);
+		netvsc_shared_mz = NULL;
+		netvsc_shared_data = NULL;
+	} else {
+		netvsc_mp_uninit_secondary();
+	}
+	netvsc_local_data.init_done = false;
+}
+
 static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
 			struct rte_vmbus_device *dev)
 {
@@ -1522,10 +1765,14 @@ static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
 	if (ret)
 		return ret;
 
+	ret = netvsc_init_once();
+	if (ret)
+		goto fail;
+
 	ret = rte_dev_event_monitor_start();
 	if (ret) {
 		PMD_DRV_LOG(ERR, "Failed to start device event monitoring");
-		goto fail;
+		goto init_once_failed;
 	}
 
 	eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data));
@@ -1551,6 +1798,17 @@ static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
 		goto dev_init_failed;
 
 	rte_eth_dev_probing_finish(eth_dev);
+
+	rte_spinlock_lock(&netvsc_shared_data_lock);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		netvsc_local_data.primary_cnt++;
+	} else {
+		rte_atomic_fetch_add_explicit(&netvsc_shared_data->secondary_cnt,
+				1, rte_memory_order_relaxed);
+		netvsc_local_data.secondary_cnt++;
+	}
+	rte_spinlock_unlock(&netvsc_shared_data_lock);
+
 	return ret;
 
 dev_init_failed:
@@ -1562,6 +1820,9 @@ static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
 vmbus_alloc_failed:
 	rte_dev_event_monitor_stop();
 
+init_once_failed:
+	netvsc_uninit_once();
+
 fail:
 	remove_cache_list();
 	return ret;
@@ -1575,6 +1836,17 @@ static int eth_hn_remove(struct rte_vmbus_device *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
+	rte_spinlock_lock(&netvsc_shared_data_lock);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		netvsc_local_data.primary_cnt--;
+	} else {
+		rte_atomic_fetch_sub_explicit(&netvsc_shared_data->secondary_cnt,
+				1, rte_memory_order_relaxed);
+		netvsc_local_data.secondary_cnt--;
+	}
+	netvsc_uninit_once();
+	rte_spinlock_unlock(&netvsc_shared_data_lock);
+
 	eth_dev = rte_eth_dev_allocated(dev->device.name);
 	if (!eth_dev)
 		return 0; /* port already released */
diff --git a/drivers/net/netvsc/hn_nvs.h b/drivers/net/netvsc/hn_nvs.h
index bf10621927..9127575cd4 100644
--- a/drivers/net/netvsc/hn_nvs.h
+++ b/drivers/net/netvsc/hn_nvs.h
@@ -243,3 +243,8 @@ hn_nvs_send_sglist(struct hn_data *hv, struct vmbus_channel *chan,
 	return rte_vmbus_chan_send_sglist(hn_nvs_get_vmbus_device(hv), chan, sg, sglen, nvs_msg,
 					  nvs_msglen, (uint64_t)sndc, need_sig);
 }
+
+enum netvsc_mp_req_type {
+	NETVSC_MP_REQ_VF_REMOVE = 1,
+};
+int netvsc_mp_req_VF(struct hn_data *hv, enum netvsc_mp_req_type type);
diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index e4bebda147..d4b4b9de9f 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -155,6 +155,10 @@ static void hn_remove_delayed(void *args)
 		PMD_DRV_LOG(ERR, "rte_eth_dev_close failed port_id=%u ret=%d",
 			    port_id, ret);
 
+	ret = netvsc_mp_req_VF(hv, NETVSC_MP_REQ_VF_REMOVE);
+	if (ret)
+		PMD_DRV_LOG(ERR, "failed to request secondary VF remove");
+
 	/* Remove the rte device when all its eth devices are removed */
 	all_eth_removed = true;
 	RTE_ETH_FOREACH_DEV_OF(port_id, dev) {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/8] net/mana: fix PD resource leak on device close
  2026-02-20  1:09 [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable longli
  2026-02-20  1:09 ` [PATCH 2/8] net/netvsc: fix race conditions on VF add/remove events longli
  2026-02-20  1:09 ` [PATCH 3/8] net/netvsc: add multi-process VF device removal support longli
@ 2026-02-20  1:09 ` longli
  2026-02-20  1:09 ` [PATCH 5/8] net/netvsc: fix devargs memory leak on hotplug longli
  2026-02-20  1:09 ` [PATCH 6/8] net/mana: fix fast-path ops setup in secondary process longli
  4 siblings, 0 replies; 6+ messages in thread
From: longli @ 2026-02-20  1:09 UTC (permalink / raw)
  To: dev, Stephen Hemminger, Wei Hu, stable; +Cc: Long Li

From: Long Li <longli@microsoft.com>

The Protection Domains (PDs) allocated during device initialization
were not being deallocated on device close, causing a resource leak.

Deallocate both ib_parent_pd and ib_pd in mana_dev_close() before
closing the IB device context. Log errors if deallocation fails,
which would indicate orphaned child resources (QPs, MRs). The close
proceeds regardless because ibv_close_device() will force kernel
cleanup of any remaining resources.

Fixes: 2f5749ead13a ("net/mana: add basic driver with build environment")
Cc: stable@dpdk.org

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/mana/mana.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
index b7ae01b152..6f70a6d774 100644
--- a/drivers/net/mana/mana.c
+++ b/drivers/net/mana/mana.c
@@ -282,6 +282,20 @@ mana_dev_close(struct rte_eth_dev *dev)
 	if (ret)
 		return ret;
 
+	if (priv->ib_parent_pd) {
+		ret = ibv_dealloc_pd(priv->ib_parent_pd);
+		if (ret)
+			DRV_LOG(ERR, "Failed to deallocate parent PD: %d", ret);
+		priv->ib_parent_pd = NULL;
+	}
+
+	if (priv->ib_pd) {
+		ret = ibv_dealloc_pd(priv->ib_pd);
+		if (ret)
+			DRV_LOG(ERR, "Failed to deallocate PD: %d", ret);
+		priv->ib_pd = NULL;
+	}
+
 	ret = ibv_close_device(priv->ib_ctx);
 	if (ret) {
 		ret = errno;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/8] net/netvsc: fix devargs memory leak on hotplug
  2026-02-20  1:09 [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable longli
                   ` (2 preceding siblings ...)
  2026-02-20  1:09 ` [PATCH 4/8] net/mana: fix PD resource leak on device close longli
@ 2026-02-20  1:09 ` longli
  2026-02-20  1:09 ` [PATCH 6/8] net/mana: fix fast-path ops setup in secondary process longli
  4 siblings, 0 replies; 6+ messages in thread
From: longli @ 2026-02-20  1:09 UTC (permalink / raw)
  To: dev, Stephen Hemminger, Wei Hu, stable; +Cc: Long Li

From: Long Li <longli@microsoft.com>

Device arguments (devargs) allocated during VF hotplug were not being
freed when the hotplug context was cleaned up, causing a memory leak.

The devargs are allocated in netvsc_hotadd_callback() via
rte_devargs_parse() and stored in the hotadd context structure. They
must be freed with rte_devargs_reset() before freeing the context.

Free devargs in both cleanup paths:
- netvsc_hotplug_retry() after hotplug attempt completes
- hn_dev_close() when canceling pending hotplug operations

Fixes: 7fc4c0997b04 ("net/netvsc: fix hot adding multiple VF PCI devices")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/netvsc/hn_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 62e6f49d3d..6327dc2132 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -716,6 +716,7 @@ static void netvsc_hotplug_retry(void *args)
 	LIST_REMOVE(hot_ctx, list);
 	rte_spinlock_unlock(&hv->hotadd_lock);
 
+	rte_devargs_reset(d);
 	free(hot_ctx);
 }
 
@@ -1131,6 +1132,7 @@ hn_dev_close(struct rte_eth_dev *dev)
 		hot_ctx = LIST_FIRST(&hv->hotadd_list);
 		rte_eal_alarm_cancel(netvsc_hotplug_retry, hot_ctx);
 		LIST_REMOVE(hot_ctx, list);
+		rte_devargs_reset(&hot_ctx->da);
 		free(hot_ctx);
 	}
 	rte_spinlock_unlock(&hv->hotadd_lock);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 6/8] net/mana: fix fast-path ops setup in secondary process
  2026-02-20  1:09 [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable longli
                   ` (3 preceding siblings ...)
  2026-02-20  1:09 ` [PATCH 5/8] net/netvsc: fix devargs memory leak on hotplug longli
@ 2026-02-20  1:09 ` longli
  4 siblings, 0 replies; 6+ messages in thread
From: longli @ 2026-02-20  1:09 UTC (permalink / raw)
  To: dev, Stephen Hemminger, Wei Hu, stable; +Cc: Long Li

From: Long Li <longli@microsoft.com>

On hotplug, the secondary process is not able to set rte_eth_fp_ops
because the primary process has not finished setting up the device
for datapath.

Fix this by properly setting up rte_eth_fp_ops in the secondary
process when the primary requests to start datapath. Set both
rxq.data and txq.data to point to the device's RX/TX queue arrays,
enabling the fast-path operations to access queues correctly.

Also update rte_eth_fp_ops burst function pointers in the STOP_RXTX
handler. Without this, the secondary's rte_eth_fp_ops retains stale
burst function pointers after stop, since rte_eth_fp_ops is
process-local and eth_dev_fp_ops_reset() in rte_eth_dev_stop() only
affects the primary.

Without this fix, the secondary process cannot transmit or receive
packets because the fast-path queue data pointers are NULL.

Fixes: 62724d1a3981 ("net/mana: start/stop device")
Cc: stable@dpdk.org

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/mana/mp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/mana/mp.c b/drivers/net/mana/mp.c
index 5467d385ce..7c5c0fa88f 100644
--- a/drivers/net/mana/mp.c
+++ b/drivers/net/mana/mp.c
@@ -145,6 +145,9 @@ mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
 		dev->tx_pkt_burst = mana_tx_burst;
 		dev->rx_pkt_burst = mana_rx_burst;
 
+		rte_eth_fp_ops[param->port_id].rxq.data = dev->data->rx_queues;
+		rte_eth_fp_ops[param->port_id].txq.data = dev->data->tx_queues;
+
 		rte_mb();
 
 		res->result = 0;
@@ -154,6 +157,9 @@ mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
 	case MANA_MP_REQ_STOP_RXTX:
 		DRV_LOG(INFO, "Port %u stopping datapath", dev->data->port_id);
 
+		rte_eth_fp_ops[param->port_id].rx_pkt_burst = mana_rx_burst_removed;
+		rte_eth_fp_ops[param->port_id].tx_pkt_burst = mana_tx_burst_removed;
+
 		dev->tx_pkt_burst = mana_tx_burst_removed;
 		dev->rx_pkt_burst = mana_rx_burst_removed;
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-02-20 10:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-20  1:09 [PATCH 1/8] net/netvsc: secondary ignore promiscuous enable/disable longli
2026-02-20  1:09 ` [PATCH 2/8] net/netvsc: fix race conditions on VF add/remove events longli
2026-02-20  1:09 ` [PATCH 3/8] net/netvsc: add multi-process VF device removal support longli
2026-02-20  1:09 ` [PATCH 4/8] net/mana: fix PD resource leak on device close longli
2026-02-20  1:09 ` [PATCH 5/8] net/netvsc: fix devargs memory leak on hotplug longli
2026-02-20  1:09 ` [PATCH 6/8] net/mana: fix fast-path ops setup in secondary process longli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox