* [PATCH v5 1/7] net/netvsc: fix race conditions on VF add/remove events
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-27 1:59 ` [PATCH v5 2/7] net/netvsc: add multi-process VF device removal support Long Li
` (7 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
Netvsc gets notification from VSP on VF add/remove over VMBUS, but the
timing may not match the DPDK sequence of device events triggered from
uevents from kernel.
Remove the retry logic from the code when attach to VF and rely on DPDK
event to attach to VF. With this change, both the notifications from VSP
and the DPDK will attempt a VF attach.
Also implement locking when checking on all VF related fields.
Fixes: a2a23a794b3a ("net/netvsc: support VF device hot add/remove")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/netvsc/hn_ethdev.c | 8 +-
drivers/net/netvsc/hn_rxtx.c | 40 +++++----
drivers/net/netvsc/hn_var.h | 1 +
drivers/net/netvsc/hn_vf.c | 144 ++++++++++++++++++---------------
4 files changed, 106 insertions(+), 87 deletions(-)
diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 6584819f4f..b51c11554c 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -672,6 +672,9 @@ static void netvsc_hotplug_retry(void *args)
free(drv_str);
+ ret = hn_vf_add(dev, hv);
+ if (ret)
+ PMD_DRV_LOG(ERR, "Failed to add VF in hotplug retry: %d", ret);
break;
}
}
@@ -1412,11 +1415,12 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
/* If VF was reported but not added, do it now */
+ rte_rwlock_write_lock(&hv->vf_lock);
if (hv->vf_ctx.vf_vsp_reported && !hv->vf_ctx.vf_vsc_switched) {
PMD_INIT_LOG(DEBUG, "Adding VF device");
-
- err = hn_vf_add(eth_dev, hv);
+ err = hn_vf_add_unlocked(eth_dev, hv);
}
+ rte_rwlock_write_unlock(&hv->vf_lock);
return 0;
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 72dab26ede..0d770d1b25 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -1540,20 +1540,18 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
hn_process_events(hv, txq->queue_id, 0);
/* Transmit over VF if present and up */
- if (hv->vf_ctx.vf_vsc_switched) {
- rte_rwlock_read_lock(&hv->vf_lock);
- vf_dev = hn_get_vf_dev(hv);
- if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
- vf_dev->data->dev_started) {
- void *sub_q = vf_dev->data->tx_queues[queue_id];
-
- nb_tx = (*vf_dev->tx_pkt_burst)
- (sub_q, tx_pkts, nb_pkts);
- rte_rwlock_read_unlock(&hv->vf_lock);
- return nb_tx;
- }
+ rte_rwlock_read_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
+ vf_dev->data->dev_started) {
+ void *sub_q = vf_dev->data->tx_queues[queue_id];
+
+ nb_tx = (*vf_dev->tx_pkt_burst)
+ (sub_q, tx_pkts, nb_pkts);
rte_rwlock_read_unlock(&hv->vf_lock);
+ return nb_tx;
}
+ rte_rwlock_read_unlock(&hv->vf_lock);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *m = tx_pkts[nb_tx];
@@ -1684,17 +1682,15 @@ hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
(void **)rx_pkts, nb_pkts, NULL);
/* If VF is available, check that as well */
- if (hv->vf_ctx.vf_vsc_switched) {
- rte_rwlock_read_lock(&hv->vf_lock);
- vf_dev = hn_get_vf_dev(hv);
- if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
- vf_dev->data->dev_started)
- nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq,
- rx_pkts + nb_rcv,
- nb_pkts - nb_rcv);
+ rte_rwlock_read_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (hv->vf_ctx.vf_vsc_switched && vf_dev &&
+ vf_dev->data->dev_started)
+ nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq,
+ rx_pkts + nb_rcv,
+ nb_pkts - nb_rcv);
+ rte_rwlock_read_unlock(&hv->vf_lock);
- rte_rwlock_read_unlock(&hv->vf_lock);
- }
return nb_rcv;
}
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
index 17c1d5d07b..ef55dee28e 100644
--- a/drivers/net/netvsc/hn_var.h
+++ b/drivers/net/netvsc/hn_var.h
@@ -239,6 +239,7 @@ hn_get_vf_dev(const struct hn_data *hv)
int hn_vf_info_get(struct hn_data *hv,
struct rte_eth_dev_info *info);
int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
+int hn_vf_add_unlocked(struct rte_eth_dev *dev, struct hn_data *hv);
int hn_vf_configure_locked(struct rte_eth_dev *dev,
const struct rte_eth_conf *dev_conf);
const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev,
diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index 0ecfaf54ea..dfd328d550 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -59,8 +59,8 @@ static int hn_vf_attach(struct rte_eth_dev *dev, struct hn_data *hv)
int port, ret;
if (hv->vf_ctx.vf_attached) {
- PMD_DRV_LOG(ERR, "VF already attached");
- return 0;
+ PMD_DRV_LOG(NOTICE, "VF already attached");
+ return -EEXIST;
}
port = hn_vf_match(dev);
@@ -91,10 +91,30 @@ static int hn_vf_attach(struct rte_eth_dev *dev, struct hn_data *hv)
PMD_DRV_LOG(DEBUG, "Attach VF device %u", port);
hv->vf_ctx.vf_attached = true;
hv->vf_ctx.vf_port = port;
+
+ ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
+ RTE_ETH_EVENT_INTR_RMV,
+ hn_eth_rmv_event_callback,
+ hv);
+ if (ret) {
+ /* Rollback state changes on callback registration failure */
+ hv->vf_ctx.vf_attached = false;
+ hv->vf_ctx.vf_port = 0;
+
+ /* Release port ownership */
+ if (rte_eth_dev_owner_unset(port, hv->owner.id) < 0)
+ PMD_DRV_LOG(ERR, "Failed to unset owner for port %d", port);
+
+ PMD_DRV_LOG(ERR,
+ "Registering callback failed for vf port %d ret %d",
+ port, ret);
+ return ret;
+ }
+
return 0;
}
-static void hn_vf_remove(struct hn_data *hv);
+static void hn_vf_remove_unlocked(struct hn_data *hv);
static void hn_remove_delayed(void *args)
{
@@ -104,12 +124,12 @@ static void hn_remove_delayed(void *args)
int ret;
bool all_eth_removed;
- /* Tell VSP to switch data path to synthetic */
- hn_vf_remove(hv);
-
PMD_DRV_LOG(NOTICE, "Start to remove port %d", port_id);
rte_rwlock_write_lock(&hv->vf_lock);
+ /* Tell VSP to switch data path to synthetic */
+ hn_vf_remove_unlocked(hv);
+
/* Give back ownership */
ret = rte_eth_dev_owner_unset(port_id, hv->owner.id);
if (ret)
@@ -213,36 +233,38 @@ static int hn_setup_vf_queues(int port, struct rte_eth_dev *dev)
return ret;
}
-int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
+int hn_vf_configure(struct rte_eth_dev *dev,
+ const struct rte_eth_conf *dev_conf);
-static void hn_vf_add_retry(void *args)
+/* Undo hn_vf_attach() on configure/start failure */
+static void hn_vf_detach(struct hn_data *hv)
{
- struct rte_eth_dev *dev = args;
- struct hn_data *hv = dev->data->dev_private;
+ uint16_t port = hv->vf_ctx.vf_port;
- hn_vf_add(dev, hv);
-}
+ rte_eth_dev_callback_unregister(port, RTE_ETH_EVENT_INTR_RMV,
+ hn_eth_rmv_event_callback, hv);
-int hn_vf_configure(struct rte_eth_dev *dev,
- const struct rte_eth_conf *dev_conf);
+ if (rte_eth_dev_owner_unset(port, hv->owner.id) < 0)
+ PMD_DRV_LOG(ERR, "Failed to unset owner for port %d", port);
-/* Add new VF device to synthetic device */
-int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
+ hv->vf_ctx.vf_attached = false;
+ hv->vf_ctx.vf_port = 0;
+}
+
+/* Add new VF device to synthetic device, unlocked version */
+int hn_vf_add_unlocked(struct rte_eth_dev *dev, struct hn_data *hv)
{
- int ret, port;
+ int ret = 0, port;
+ bool fresh_attach;
if (!hv->vf_ctx.vf_vsp_reported || hv->vf_ctx.vf_vsc_switched)
- return 0;
-
- rte_rwlock_write_lock(&hv->vf_lock);
+ goto exit;
ret = hn_vf_attach(dev, hv);
- if (ret) {
- PMD_DRV_LOG(NOTICE,
- "RNDIS reports VF but device not found, retrying");
- rte_eal_alarm_set(1000000, hn_vf_add_retry, dev);
+ if (ret && ret != -EEXIST)
goto exit;
- }
+
+ fresh_attach = (ret == 0);
port = hv->vf_ctx.vf_port;
@@ -252,7 +274,7 @@ int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
if (dev->data->dev_started) {
if (rte_eth_devices[port].data->dev_started) {
PMD_DRV_LOG(ERR, "VF already started on hot add");
- goto exit;
+ goto switch_data_path;
}
PMD_DRV_LOG(NOTICE, "configuring VF port %d", port);
@@ -260,7 +282,7 @@ int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
if (ret) {
PMD_DRV_LOG(ERR, "Failed to configure VF port %d",
port);
- goto exit;
+ goto detach;
}
ret = hn_setup_vf_queues(port, dev);
@@ -268,13 +290,13 @@ int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
PMD_DRV_LOG(ERR,
"Failed to configure VF queues port %d",
port);
- goto exit;
+ goto detach;
}
ret = rte_eth_dev_set_mtu(port, dev->data->mtu);
if (ret) {
PMD_DRV_LOG(ERR, "Failed to set VF MTU");
- goto exit;
+ goto detach;
}
PMD_DRV_LOG(NOTICE, "Starting VF port %d", port);
@@ -282,31 +304,42 @@ int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
if (ret) {
PMD_DRV_LOG(ERR, "rte_eth_dev_start failed ret=%d",
ret);
- goto exit;
+ goto detach;
}
hv->vf_ctx.vf_state = vf_started;
}
+switch_data_path:
ret = hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
if (ret == 0)
hv->vf_ctx.vf_vsc_switched = true;
exit:
- rte_rwlock_write_unlock(&hv->vf_lock);
+ return ret;
+
+detach:
+ if (fresh_attach)
+ hn_vf_detach(hv);
return ret;
}
-/* Switch data path to VF device */
-static void hn_vf_remove(struct hn_data *hv)
+/* Add new VF device to synthetic device, locked version */
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
{
int ret;
- if (!hv->vf_ctx.vf_vsc_switched) {
- PMD_DRV_LOG(ERR, "VF path not active");
- return;
- }
-
rte_rwlock_write_lock(&hv->vf_lock);
+ ret = hn_vf_add_unlocked(dev, hv);
+ rte_rwlock_write_unlock(&hv->vf_lock);
+
+ return ret;
+}
+
+/* Switch data path to synthetic, unlocked version */
+static void hn_vf_remove_unlocked(struct hn_data *hv)
+{
+ int ret;
+
if (!hv->vf_ctx.vf_vsc_switched) {
PMD_DRV_LOG(ERR, "VF path not active");
} else {
@@ -314,9 +347,10 @@ static void hn_vf_remove(struct hn_data *hv)
ret = hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC);
if (ret)
PMD_DRV_LOG(ERR, "Failed to switch to synthetic");
+
+ /* Clear switched flag regardless — VF is being removed */
hv->vf_ctx.vf_vsc_switched = false;
}
- rte_rwlock_write_unlock(&hv->vf_lock);
}
/* Handle VF association message from host */
@@ -338,14 +372,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
vf_assoc->allocated ? "add to" : "remove from",
dev->data->port_id);
- hv->vf_ctx.vf_vsp_reported = vf_assoc->allocated;
+ rte_rwlock_write_lock(&hv->vf_lock);
+ hv->vf_ctx.vf_vsp_reported = vf_assoc->allocated;
if (dev->state == RTE_ETH_DEV_ATTACHED) {
if (vf_assoc->allocated)
- hn_vf_add(dev, hv);
+ hn_vf_add_unlocked(dev, hv);
else
- hn_vf_remove(hv);
+ hn_vf_remove_unlocked(hv);
}
+
+ rte_rwlock_write_unlock(&hv->vf_lock);
}
static void
@@ -426,29 +463,12 @@ int hn_vf_configure(struct rte_eth_dev *dev,
vf_conf.intr_conf.rmv = 1;
if (hv->vf_ctx.vf_attached) {
- ret = rte_eth_dev_callback_register(hv->vf_ctx.vf_port,
- RTE_ETH_EVENT_INTR_RMV,
- hn_eth_rmv_event_callback,
- hv);
- if (ret) {
- PMD_DRV_LOG(ERR,
- "Registering callback failed for vf port %d ret %d",
- hv->vf_ctx.vf_port, ret);
- return ret;
- }
-
ret = rte_eth_dev_configure(hv->vf_ctx.vf_port,
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
&vf_conf);
if (ret) {
PMD_DRV_LOG(ERR, "VF configuration failed: %d", ret);
-
- rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
- RTE_ETH_EVENT_INTR_RMV,
- hn_eth_rmv_event_callback,
- hv);
-
return ret;
}
@@ -555,9 +575,7 @@ int hn_vf_close(struct rte_eth_dev *dev)
int ret = 0;
struct hn_data *hv = dev->data->dev_private;
- rte_eal_alarm_cancel(hn_vf_add_retry, dev);
-
- rte_rwlock_read_lock(&hv->vf_lock);
+ rte_rwlock_write_lock(&hv->vf_lock);
if (hv->vf_ctx.vf_attached) {
rte_eth_dev_callback_unregister(hv->vf_ctx.vf_port,
RTE_ETH_EVENT_INTR_RMV,
@@ -567,7 +585,7 @@ int hn_vf_close(struct rte_eth_dev *dev)
ret = rte_eth_dev_close(hv->vf_ctx.vf_port);
hv->vf_ctx.vf_attached = false;
}
- rte_rwlock_read_unlock(&hv->vf_lock);
+ rte_rwlock_write_unlock(&hv->vf_lock);
return ret;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v5 2/7] net/netvsc: add multi-process VF device removal support
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
2026-02-27 1:59 ` [PATCH v5 1/7] net/netvsc: fix race conditions on VF add/remove events Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-27 1:59 ` [PATCH v5 3/7] net/mana: fix PD resource leak on device close Long Li
` (6 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
When a VF device is hot-removed by the primary process, secondary
processes must be notified to release their references to the VF port.
Without this, secondary processes retain stale port references leading
to crashes or undefined behavior when accessing the removed device.
This patch adds multi-process communication infrastructure to coordinate
VF removal across all processes:
- Shared memory (netvsc_shared_data) to track secondary process count
- Multi-process message handlers (NETVSC_MP_REQ_VF_REMOVE) to notify
secondaries when primary removes a VF device
- Secondary handler calls rte_eth_dev_release_port() to cleanly release
the VF port in its own process space
- Primary waits for all secondaries to acknowledge removal before
proceeding
The implementation uses rte_mp_request_sync() to ensure all secondary
processes respond within NETVSC_MP_REQ_TIMEOUT_SEC (5 seconds) before
the primary completes the VF removal sequence.
Fixes: 7fc4c0997b04 ("net/netvsc: fix hot adding multiple VF PCI devices")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/netvsc/hn_ethdev.c | 290 ++++++++++++++++++++++++++++++++-
drivers/net/netvsc/hn_nvs.h | 6 +
drivers/net/netvsc/hn_vf.c | 4 +
3 files changed, 295 insertions(+), 5 deletions(-)
diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index b51c11554c..d22595ce95 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -48,6 +48,31 @@
(var) = (tvar))
#endif
+/* Spinlock for netvsc_shared_data */
+static rte_spinlock_t netvsc_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct netvsc_shared_data {
+ RTE_ATOMIC(uint32_t) secondary_cnt;
+} *netvsc_shared_data;
+
+static const struct rte_memzone *netvsc_shared_mz;
+#define MZ_NETVSC_SHARED_DATA "netvsc_shared_data"
+
+static struct netvsc_local_data {
+ bool init_done;
+ unsigned int primary_cnt;
+ unsigned int secondary_cnt;
+} netvsc_local_data;
+
+#define NETVSC_MP_NAME "net_netvsc_mp"
+#define NETVSC_MP_REQ_TIMEOUT_SEC 5
+
+struct netvsc_mp_param {
+ enum netvsc_mp_req_type type;
+ int vf_port;
+ int result;
+};
+
#define HN_TX_OFFLOAD_CAPS (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \
RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \
RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \
@@ -1505,6 +1530,217 @@ static void remove_cache_list(void)
rte_spinlock_unlock(&netvsc_lock);
}
+static int
+netvsc_mp_primary_handle(const struct rte_mp_msg *mp_msg __rte_unused,
+ const void *peer __rte_unused)
+{
+ /* Stub function required for multi-process message handling registration */
+ return 0;
+}
+
+static void
+mp_init_msg(struct rte_mp_msg *msg, enum netvsc_mp_req_type type, int vf_port)
+{
+ struct netvsc_mp_param *param;
+
+ strlcpy(msg->name, NETVSC_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+
+ param = (struct netvsc_mp_param *)msg->param;
+ param->type = type;
+ param->vf_port = vf_port;
+}
+
+static int netvsc_secondary_handle_device_remove(int vf_port)
+{
+ if (!rte_eth_dev_is_valid_port(vf_port)) {
+ /* VF not probed in this secondary — nothing to release */
+ PMD_DRV_LOG(DEBUG, "VF port %u not present in secondary, skipping",
+ vf_port);
+ return 0;
+ }
+
+ PMD_DRV_LOG(DEBUG, "Secondary releasing VF port %d", vf_port);
+ return rte_eth_dev_release_port(&rte_eth_devices[vf_port]);
+}
+
+static int
+netvsc_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res = { 0 };
+ struct netvsc_mp_param *res = (struct netvsc_mp_param *)mp_res.param;
+ const struct netvsc_mp_param *param =
+ (const struct netvsc_mp_param *)mp_msg->param;
+ int ret = 0;
+
+ mp_init_msg(&mp_res, param->type, param->vf_port);
+
+ switch (param->type) {
+ case NETVSC_MP_REQ_VF_REMOVE:
+ res->result = netvsc_secondary_handle_device_remove(param->vf_port);
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+
+ default:
+ PMD_DRV_LOG(ERR, "Unknown primary MP type %u", param->type);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int netvsc_mp_init_primary(void)
+{
+ int ret;
+ ret = rte_mp_action_register(NETVSC_MP_NAME, netvsc_mp_primary_handle);
+ if (ret && rte_errno != ENOTSUP) {
+ PMD_DRV_LOG(ERR, "Failed to register primary handler %d %d",
+ ret, rte_errno);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void netvsc_mp_uninit_primary(void)
+{
+ rte_mp_action_unregister(NETVSC_MP_NAME);
+}
+
+static int netvsc_mp_init_secondary(void)
+{
+ return rte_mp_action_register(NETVSC_MP_NAME, netvsc_mp_secondary_handle);
+}
+
+static void netvsc_mp_uninit_secondary(void)
+{
+ rte_mp_action_unregister(NETVSC_MP_NAME);
+}
+
+int netvsc_mp_req_vf(struct hn_data *hv, enum netvsc_mp_req_type type,
+ int vf_port)
+{
+ struct rte_mp_msg mp_req = { 0 };
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep = { 0 };
+ struct netvsc_mp_param *res;
+ struct timespec ts = {.tv_sec = NETVSC_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int i, ret;
+
+ /* if secondary count is 0, return */
+ if (rte_atomic_load_explicit(&netvsc_shared_data->secondary_cnt,
+ rte_memory_order_acquire) == 0)
+ return 0;
+
+ mp_init_msg(&mp_req, type, vf_port);
+
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ if (rte_errno != ENOTSUP)
+ PMD_DRV_LOG(ERR, "port %u failed to request VF remove",
+ hv->port_id);
+ else
+ ret = 0;
+ goto exit;
+ }
+
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ PMD_DRV_LOG(ERR, "port %u not all secondaries responded type %d",
+ hv->port_id, type);
+ ret = -1;
+ goto exit;
+ }
+ for (i = 0; i < mp_rep.nb_received; i++) {
+ mp_res = &mp_rep.msgs[i];
+ res = (struct netvsc_mp_param *)mp_res->param;
+ if (res->result) {
+ PMD_DRV_LOG(ERR, "port %u request failed on secondary %d",
+ hv->port_id, i);
+ ret = -1;
+ goto exit;
+ }
+ }
+
+exit:
+ free(mp_rep.msgs);
+ return ret;
+}
+
+static int netvsc_init_once(void)
+{
+ int ret = 0;
+ const struct rte_memzone *secondary_mz;
+
+ if (netvsc_local_data.init_done)
+ return 0;
+
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ netvsc_shared_mz = rte_memzone_reserve(MZ_NETVSC_SHARED_DATA,
+ sizeof(*netvsc_shared_data), SOCKET_ID_ANY, 0);
+ if (!netvsc_shared_mz) {
+ PMD_DRV_LOG(ERR, "Cannot allocate netvsc shared data");
+ return -rte_errno;
+ }
+ netvsc_shared_data = netvsc_shared_mz->addr;
+ rte_atomic_store_explicit(&netvsc_shared_data->secondary_cnt,
+ 0, rte_memory_order_release);
+
+ ret = netvsc_mp_init_primary();
+ if (ret) {
+ rte_memzone_free(netvsc_shared_mz);
+ netvsc_shared_mz = NULL;
+ netvsc_shared_data = NULL;
+ break;
+ }
+
+ PMD_DRV_LOG(DEBUG, "MP INIT PRIMARY");
+ netvsc_local_data.init_done = true;
+ break;
+
+ case RTE_PROC_SECONDARY:
+ secondary_mz = rte_memzone_lookup(MZ_NETVSC_SHARED_DATA);
+ if (!secondary_mz) {
+ PMD_DRV_LOG(ERR, "Cannot attach netvsc shared data");
+ return -rte_errno;
+ }
+ netvsc_shared_data = secondary_mz->addr;
+ ret = netvsc_mp_init_secondary();
+ if (ret) {
+ netvsc_shared_data = NULL;
+ break;
+ }
+
+ PMD_DRV_LOG(DEBUG, "MP INIT SECONDARY");
+ netvsc_local_data.init_done = true;
+ break;
+
+ default:
+ /* Impossible */
+ ret = -EPROTO;
+ break;
+ }
+
+ return ret;
+}
+
+static void netvsc_uninit_once(void)
+{
+ if (netvsc_local_data.primary_cnt ||
+ netvsc_local_data.secondary_cnt)
+ return;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ netvsc_mp_uninit_primary();
+ rte_memzone_free(netvsc_shared_mz);
+ netvsc_shared_mz = NULL;
+ netvsc_shared_data = NULL;
+ } else {
+ netvsc_mp_uninit_secondary();
+ }
+ netvsc_local_data.init_done = false;
+}
+
static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
struct rte_vmbus_device *dev)
{
@@ -1518,10 +1754,26 @@ static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
if (ret)
return ret;
+ rte_spinlock_lock(&netvsc_shared_data_lock);
+ ret = netvsc_init_once();
+ if (!ret) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ netvsc_local_data.primary_cnt++;
+ } else {
+ rte_atomic_fetch_add_explicit(
+ &netvsc_shared_data->secondary_cnt,
+ 1, rte_memory_order_release);
+ netvsc_local_data.secondary_cnt++;
+ }
+ }
+ rte_spinlock_unlock(&netvsc_shared_data_lock);
+ if (ret)
+ goto fail;
+
ret = rte_dev_event_monitor_start();
if (ret) {
PMD_DRV_LOG(ERR, "Failed to start device event monitoring");
- goto fail;
+ goto init_once_failed;
}
eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data));
@@ -1547,6 +1799,7 @@ static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
goto dev_init_failed;
rte_eth_dev_probing_finish(eth_dev);
+
return ret;
dev_init_failed:
@@ -1558,6 +1811,19 @@ static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
vmbus_alloc_failed:
rte_dev_event_monitor_stop();
+init_once_failed:
+ rte_spinlock_lock(&netvsc_shared_data_lock);
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ netvsc_local_data.primary_cnt--;
+ } else {
+ rte_atomic_fetch_sub_explicit(
+ &netvsc_shared_data->secondary_cnt,
+ 1, rte_memory_order_release);
+ netvsc_local_data.secondary_cnt--;
+ }
+ netvsc_uninit_once();
+ rte_spinlock_unlock(&netvsc_shared_data_lock);
+
fail:
remove_cache_list();
return ret;
@@ -1572,12 +1838,14 @@ static int eth_hn_remove(struct rte_vmbus_device *dev)
PMD_INIT_FUNC_TRACE();
eth_dev = rte_eth_dev_allocated(dev->device.name);
- if (!eth_dev)
- return 0; /* port already released */
+ if (!eth_dev) {
+ ret = 0; /* port already released */
+ goto uninit;
+ }
ret = eth_hn_dev_uninit(eth_dev);
if (ret)
- return ret;
+ goto uninit;
process_priv = eth_dev->process_private;
rte_free(process_priv);
@@ -1587,7 +1855,19 @@ static int eth_hn_remove(struct rte_vmbus_device *dev)
remove_cache_list();
- return 0;
+uninit:
+ rte_spinlock_lock(&netvsc_shared_data_lock);
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ netvsc_local_data.primary_cnt--;
+ } else {
+ rte_atomic_fetch_sub_explicit(&netvsc_shared_data->secondary_cnt,
+ 1, rte_memory_order_release);
+ netvsc_local_data.secondary_cnt--;
+ }
+ netvsc_uninit_once();
+ rte_spinlock_unlock(&netvsc_shared_data_lock);
+
+ return ret;
}
/* Network device GUID */
diff --git a/drivers/net/netvsc/hn_nvs.h b/drivers/net/netvsc/hn_nvs.h
index bf10621927..67dbfd7be7 100644
--- a/drivers/net/netvsc/hn_nvs.h
+++ b/drivers/net/netvsc/hn_nvs.h
@@ -243,3 +243,9 @@ hn_nvs_send_sglist(struct hn_data *hv, struct vmbus_channel *chan,
return rte_vmbus_chan_send_sglist(hn_nvs_get_vmbus_device(hv), chan, sg, sglen, nvs_msg,
nvs_msglen, (uint64_t)sndc, need_sig);
}
+
+enum netvsc_mp_req_type {
+ NETVSC_MP_REQ_VF_REMOVE = 1,
+};
+int netvsc_mp_req_vf(struct hn_data *hv, enum netvsc_mp_req_type type,
+ int vf_port);
diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index dfd328d550..d9efa7e96f 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -155,6 +155,10 @@ static void hn_remove_delayed(void *args)
PMD_DRV_LOG(ERR, "rte_eth_dev_close failed port_id=%u ret=%d",
port_id, ret);
+ ret = netvsc_mp_req_vf(hv, NETVSC_MP_REQ_VF_REMOVE, port_id);
+ if (ret)
+ PMD_DRV_LOG(ERR, "failed to request secondary VF remove");
+
/* Remove the rte device when all its eth devices are removed */
all_eth_removed = true;
RTE_ETH_FOREACH_DEV_OF(port_id, dev) {
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v5 3/7] net/mana: fix PD resource leak on device close
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
2026-02-27 1:59 ` [PATCH v5 1/7] net/netvsc: fix race conditions on VF add/remove events Long Li
2026-02-27 1:59 ` [PATCH v5 2/7] net/netvsc: add multi-process VF device removal support Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-27 1:59 ` [PATCH v5 4/7] net/netvsc: fix devargs memory leak on hotplug Long Li
` (5 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
The Protection Domains (PDs) allocated during device initialization
were not being deallocated on device close, causing a resource leak.
Deallocate both ib_parent_pd and ib_pd in mana_dev_close() before
closing the IB device context. Log errors if deallocation fails,
which would indicate orphaned child resources (QPs, MRs). The close
proceeds regardless because ibv_close_device() will force kernel
cleanup of any remaining resources.
Fixes: 2f5749ead13a ("net/mana: add basic driver with build environment")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/mana/mana.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
index b7ae01b152..67396cda1f 100644
--- a/drivers/net/mana/mana.c
+++ b/drivers/net/mana/mana.c
@@ -282,6 +282,20 @@ mana_dev_close(struct rte_eth_dev *dev)
if (ret)
return ret;
+ if (priv->ib_parent_pd) {
+ int err = ibv_dealloc_pd(priv->ib_parent_pd);
+ if (err)
+ DRV_LOG(ERR, "Failed to deallocate parent PD: %d", err);
+ priv->ib_parent_pd = NULL;
+ }
+
+ if (priv->ib_pd) {
+ int err = ibv_dealloc_pd(priv->ib_pd);
+ if (err)
+ DRV_LOG(ERR, "Failed to deallocate PD: %d", err);
+ priv->ib_pd = NULL;
+ }
+
ret = ibv_close_device(priv->ib_ctx);
if (ret) {
ret = errno;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v5 4/7] net/netvsc: fix devargs memory leak on hotplug
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
` (2 preceding siblings ...)
2026-02-27 1:59 ` [PATCH v5 3/7] net/mana: fix PD resource leak on device close Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-27 1:59 ` [PATCH v5 5/7] net/mana: fix fast-path ops setup in secondary process Long Li
` (4 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
Device arguments (devargs) allocated during VF hotplug were not being
freed when the hotplug context was cleaned up, causing a memory leak.
The devargs are allocated in netvsc_hotadd_callback() via
rte_devargs_parse() and stored in the hotadd context structure. They
must be freed with rte_devargs_reset() before freeing the context.
Free devargs in both cleanup paths:
- netvsc_hotplug_retry() after hotplug attempt completes
- hn_dev_close() when canceling pending hotplug operations
Fixes: 7fc4c0997b04 ("net/netvsc: fix hot adding multiple VF PCI devices")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/netvsc/hn_ethdev.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index d22595ce95..791e832d5d 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -712,6 +712,7 @@ static void netvsc_hotplug_retry(void *args)
LIST_REMOVE(hot_ctx, list);
rte_spinlock_unlock(&hv->hotadd_lock);
+ rte_devargs_reset(d);
free(hot_ctx);
}
@@ -1127,6 +1128,7 @@ hn_dev_close(struct rte_eth_dev *dev)
hot_ctx = LIST_FIRST(&hv->hotadd_list);
rte_eal_alarm_cancel(netvsc_hotplug_retry, hot_ctx);
LIST_REMOVE(hot_ctx, list);
+ rte_devargs_reset(&hot_ctx->da);
free(hot_ctx);
}
rte_spinlock_unlock(&hv->hotadd_lock);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v5 5/7] net/mana: fix fast-path ops setup in secondary process
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
` (3 preceding siblings ...)
2026-02-27 1:59 ` [PATCH v5 4/7] net/netvsc: fix devargs memory leak on hotplug Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-27 1:59 ` [PATCH v5 6/7] net/mlx5: " Long Li
` (3 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
On hotplug, the secondary process is not able to set rte_eth_fp_ops
because the primary process has not finished setting up the device
for datapath.
Fix this by properly setting up rte_eth_fp_ops in the secondary
process when the primary requests to start datapath. Set both
rxq.data and txq.data to point to the device's RX/TX queue arrays,
enabling the fast-path operations to access queues correctly.
Also update rte_eth_fp_ops burst function pointers in the STOP_RXTX
handler. Without this, the secondary's rte_eth_fp_ops retains stale
burst function pointers after stop, since rte_eth_fp_ops is
process-local and eth_dev_fp_ops_reset() in rte_eth_dev_stop() only
affects the primary.
Without this fix, the secondary process cannot transmit or receive
packets because the fast-path queue data pointers are NULL.
Fixes: 62724d1a3981 ("net/mana: start/stop device")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/mana/mp.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/net/mana/mp.c b/drivers/net/mana/mp.c
index 5467d385ce..72417fc0c7 100644
--- a/drivers/net/mana/mp.c
+++ b/drivers/net/mana/mp.c
@@ -145,6 +145,11 @@ mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
dev->tx_pkt_burst = mana_tx_burst;
dev->rx_pkt_burst = mana_rx_burst;
+ rte_eth_fp_ops[param->port_id].rx_pkt_burst = mana_rx_burst;
+ rte_eth_fp_ops[param->port_id].tx_pkt_burst = mana_tx_burst;
+ rte_eth_fp_ops[param->port_id].rxq.data = dev->data->rx_queues;
+ rte_eth_fp_ops[param->port_id].txq.data = dev->data->tx_queues;
+
rte_mb();
res->result = 0;
@@ -154,6 +159,9 @@ mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
case MANA_MP_REQ_STOP_RXTX:
DRV_LOG(INFO, "Port %u stopping datapath", dev->data->port_id);
+ rte_eth_fp_ops[param->port_id].rx_pkt_burst = mana_rx_burst_removed;
+ rte_eth_fp_ops[param->port_id].tx_pkt_burst = mana_tx_burst_removed;
+
dev->tx_pkt_burst = mana_tx_burst_removed;
dev->rx_pkt_burst = mana_rx_burst_removed;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v5 6/7] net/mlx5: fix fast-path ops setup in secondary process
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
` (4 preceding siblings ...)
2026-02-27 1:59 ` [PATCH v5 5/7] net/mana: fix fast-path ops setup in secondary process Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-27 1:59 ` [PATCH v5 7/7] net/mlx4: " Long Li
` (2 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
On hotplug, the secondary process is not able to set rte_eth_fp_ops
because the primary process has not finished setting up the device
for datapath.
When the primary process re-attaches a VF during a hot-add event
(e.g. Accelerated Networking re-enabled on Azure), it sends
MLX5_MP_REQ_START_RXTX to the secondary process. The secondary
handler sets rx/tx burst functions and reinitializes UAR mappings,
but does not update the fast-path queue data pointers. The stale
rxq.data and txq.data in rte_eth_fp_ops cause a segfault when the
secondary resumes polling the re-added VF port.
Fix this by setting rte_eth_fp_ops rxq.data and txq.data to point
to the device's RX/TX queue arrays in the secondary process when
the primary requests to start datapath, before the memory barrier.
Also update rte_eth_fp_ops burst function pointers in the STOP_RXTX
handler. Without this, the secondary's rte_eth_fp_ops retains stale
burst function pointers after stop, since rte_eth_fp_ops is
process-local and eth_dev_fp_ops_reset() in rte_eth_dev_stop() only
affects the primary.
Without this fix, the secondary process crashes with SIGSEGV in
rte_eth_rx_burst() at qd = p->rxq.data[queue_id] when the VF is
hot-added back.
Fixes: 2aac5b5d119f ("net/mlx5: sync stop/start with secondary process")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/mlx5/linux/mlx5_mp_os.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/net/mlx5/linux/mlx5_mp_os.c b/drivers/net/mlx5/linux/mlx5_mp_os.c
index f2e71c9bd4..ca7b1cfa97 100644
--- a/drivers/net/mlx5/linux/mlx5_mp_os.c
+++ b/drivers/net/mlx5/linux/mlx5_mp_os.c
@@ -189,6 +189,10 @@ struct rte_mp_msg mp_res;
}
}
close(mp_msg->fds[0]);
+ rte_eth_fp_ops[param->port_id].rx_pkt_burst = dev->rx_pkt_burst;
+ rte_eth_fp_ops[param->port_id].tx_pkt_burst = dev->tx_pkt_burst;
+ rte_eth_fp_ops[param->port_id].rxq.data = dev->data->rx_queues;
+ rte_eth_fp_ops[param->port_id].txq.data = dev->data->tx_queues;
rte_mb();
mp_init_msg(&priv->mp_id, &mp_res, param->type);
res->result = 0;
@@ -198,6 +202,8 @@ struct rte_mp_msg mp_res;
DRV_LOG(INFO, "port %u stopping datapath", dev->data->port_id);
dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
+ rte_eth_fp_ops[param->port_id].rx_pkt_burst = rte_eth_pkt_burst_dummy;
+ rte_eth_fp_ops[param->port_id].tx_pkt_burst = rte_eth_pkt_burst_dummy;
rte_mb();
mp_init_msg(&priv->mp_id, &mp_res, param->type);
res->result = 0;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v5 7/7] net/mlx4: fix fast-path ops setup in secondary process
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
` (5 preceding siblings ...)
2026-02-27 1:59 ` [PATCH v5 6/7] net/mlx5: " Long Li
@ 2026-02-27 1:59 ` Long Li
2026-02-28 0:41 ` [PATCH v5 0/7] multi-process and VF hotplug fixes Stephen Hemminger
2026-02-28 17:03 ` Stephen Hemminger
8 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-27 1:59 UTC (permalink / raw)
To: dev, Wei Hu, Stephen Hemminger, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
Cc: Long Li
The secondary process MLX4_MP_REQ_START_RXTX handler sets
dev->rx_pkt_burst and dev->tx_pkt_burst, but does not update
rte_eth_fp_ops[].rxq.data and txq.data. Since rte_eth_rx_burst()
uses rte_eth_fp_ops (which is process-local), the secondary
retains stale queue data pointers after VF hot-add, causing a
segfault.
Similarly, the MLX4_MP_REQ_STOP_RXTX handler sets dev burst
functions to dummy but does not update rte_eth_fp_ops burst
pointers, leaving the secondary calling into freed resources.
Fix by updating rte_eth_fp_ops rxq.data and txq.data in the
START_RXTX handler, and rte_eth_fp_ops burst function pointers
in the STOP_RXTX handler.
Fixes: 080d8bc15905 ("net/mlx4: sync stop/start with secondary process")
Cc: stable@dpdk.org
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/mlx4/mlx4_mp.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c
index 534cb31151..0784b2d59a 100644
--- a/drivers/net/mlx4/mlx4_mp.c
+++ b/drivers/net/mlx4/mlx4_mp.c
@@ -149,6 +149,10 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
}
#endif
close(mp_msg->fds[0]);
+ rte_eth_fp_ops[param->port_id].rx_pkt_burst = dev->rx_pkt_burst;
+ rte_eth_fp_ops[param->port_id].tx_pkt_burst = dev->tx_pkt_burst;
+ rte_eth_fp_ops[param->port_id].rxq.data = dev->data->rx_queues;
+ rte_eth_fp_ops[param->port_id].txq.data = dev->data->tx_queues;
rte_mb();
mp_init_msg(dev, &mp_res, param->type);
res->result = 0;
@@ -158,6 +162,8 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
INFO("port %u stopping datapath", dev->data->port_id);
dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
+ rte_eth_fp_ops[param->port_id].rx_pkt_burst = rte_eth_pkt_burst_dummy;
+ rte_eth_fp_ops[param->port_id].tx_pkt_burst = rte_eth_pkt_burst_dummy;
rte_mb();
mp_init_msg(dev, &mp_res, param->type);
res->result = 0;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v5 0/7] multi-process and VF hotplug fixes
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
` (6 preceding siblings ...)
2026-02-27 1:59 ` [PATCH v5 7/7] net/mlx4: " Long Li
@ 2026-02-28 0:41 ` Stephen Hemminger
2026-02-28 1:36 ` [EXTERNAL] " Long Li
2026-02-28 17:03 ` Stephen Hemminger
8 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2026-02-28 0:41 UTC (permalink / raw)
To: Long Li
Cc: dev, Wei Hu, stable, Dariusz Sosnowski, Viacheslav Ovsiienko,
Bing Zhao, Ori Kam, Suanming Mou, Matan Azrad
On Thu, 26 Feb 2026 17:59:20 -0800
Long Li <longli@microsoft.com> wrote:
> Fix several issues with VF hotplug and multi-process support in
> netvsc, mana, mlx5, and mlx4 drivers:
>
> - Fix race conditions between VSP notifications and DPDK device events
> during VF add/remove, with proper locking of VF-related fields
> - Add multi-process communication infrastructure for coordinating VF
> removal across primary and secondary processes
> - Fix Protection Domain resource leak on device close in mana
> - Fix devargs memory leak during VF hotplug in netvsc
> - Fix fast-path ops (rte_eth_fp_ops) setup in secondary processes for
> mana, mlx5, and mlx4, ensuring burst function pointers are restored
> after STOP->START cycles
>
> v5:
> - Patches 5,6,7: Also restore rte_eth_fp_ops burst function pointers
> (rx_pkt_burst, tx_pkt_burst) in START_RXTX handler, not just queue
> data pointers. Without this, after a STOP->START cycle the secondary
> process burst pointers remain set to dummy functions.
>
> v4:
> - Patch 1: Check hn_vf_add() return value in netvsc_hotplug_retry
> - Patch 1: Track fresh_attach to avoid tearing down original VF
> attachment when configure/start fails on an -EEXIST path
> - Patch 2: Move counter decrement and netvsc_uninit_once() after device
> cleanup in eth_hn_remove() to prevent use-after-free of shared data
> - Patch 2: Clear netvsc_shared_data on init failure paths to prevent
> dangling pointer
>
> v3:
> - Fix review comments from v2
>
> v2:
> - Initial rework of VF add/remove locking
>
> Long Li (7):
> net/netvsc: fix race conditions on VF add/remove events
> net/netvsc: add multi-process VF device removal support
> net/mana: fix PD resource leak on device close
> net/netvsc: fix devargs memory leak on hotplug
> net/mana: fix fast-path ops setup in secondary process
> net/mlx5: fix fast-path ops setup in secondary process
> net/mlx4: fix fast-path ops setup in secondary process
>
> drivers/net/mana/mana.c | 14 ++
> drivers/net/mana/mp.c | 8 +
> drivers/net/mlx4/mlx4_mp.c | 6 +
> drivers/net/mlx5/linux/mlx5_mp_os.c | 6 +
> drivers/net/netvsc/hn_ethdev.c | 300 +++++++++++++++++++++++++++-
> drivers/net/netvsc/hn_nvs.h | 6 +
> drivers/net/netvsc/hn_rxtx.c | 40 ++--
> drivers/net/netvsc/hn_var.h | 1 +
> drivers/net/netvsc/hn_vf.c | 148 ++++++++------
> 9 files changed, 437 insertions(+), 92 deletions(-)
>
Looks okay to me, the AI review feedback raised a couple of questions.
If it is ok will take it as is for this release.
The AI summary was:
Patch 1: hn_vf_add_unlocked() — when hn_nvs_set_datapath() fails at switch_data_path: after a fresh attach, the VF is not detached (no goto detach). This leaves inconsistent state.
Patch 2: netvsc_uninit_once() — primary can free the shared memzone while secondaries still reference netvsc_shared_data, causing a dangling pointer. The local-only secondary_cnt check doesn't reflect remote secondary processes.
Warnings (should consider)
Patch 1: Potential deadlock in hn_vf_close() — holding write lock while calling rte_eth_dev_callback_unregister() which synchronously waits for in-progress callbacks that may themselves try to acquire the write lock via hn_remove_delayed().
There were a couple more things but these were just AI being overly paranoid.
^ permalink raw reply [flat|nested] 12+ messages in thread* RE: [EXTERNAL] Re: [PATCH v5 0/7] multi-process and VF hotplug fixes
2026-02-28 0:41 ` [PATCH v5 0/7] multi-process and VF hotplug fixes Stephen Hemminger
@ 2026-02-28 1:36 ` Long Li
0 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-02-28 1:36 UTC (permalink / raw)
To: Stephen Hemminger
Cc: dev@dpdk.org, Wei Hu, stable@dpdk.org, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
> Subject: [EXTERNAL] Re: [PATCH v5 0/7] multi-process and VF hotplug fixes
>
> On Thu, 26 Feb 2026 17:59:20 -0800
> Long Li <longli@microsoft.com> wrote:
>
> > Fix several issues with VF hotplug and multi-process support in
> > netvsc, mana, mlx5, and mlx4 drivers:
> >
> > - Fix race conditions between VSP notifications and DPDK device events
> > during VF add/remove, with proper locking of VF-related fields
> > - Add multi-process communication infrastructure for coordinating VF
> > removal across primary and secondary processes
> > - Fix Protection Domain resource leak on device close in mana
> > - Fix devargs memory leak during VF hotplug in netvsc
> > - Fix fast-path ops (rte_eth_fp_ops) setup in secondary processes for
> > mana, mlx5, and mlx4, ensuring burst function pointers are restored
> > after STOP->START cycles
> >
> > v5:
> > - Patches 5,6,7: Also restore rte_eth_fp_ops burst function pointers
> > (rx_pkt_burst, tx_pkt_burst) in START_RXTX handler, not just queue
> > data pointers. Without this, after a STOP->START cycle the secondary
> > process burst pointers remain set to dummy functions.
> >
> > v4:
> > - Patch 1: Check hn_vf_add() return value in netvsc_hotplug_retry
> > - Patch 1: Track fresh_attach to avoid tearing down original VF
> > attachment when configure/start fails on an -EEXIST path
> > - Patch 2: Move counter decrement and netvsc_uninit_once() after device
> > cleanup in eth_hn_remove() to prevent use-after-free of shared data
> > - Patch 2: Clear netvsc_shared_data on init failure paths to prevent
> > dangling pointer
> >
> > v3:
> > - Fix review comments from v2
> >
> > v2:
> > - Initial rework of VF add/remove locking
> >
> > Long Li (7):
> > net/netvsc: fix race conditions on VF add/remove events
> > net/netvsc: add multi-process VF device removal support
> > net/mana: fix PD resource leak on device close
> > net/netvsc: fix devargs memory leak on hotplug
> > net/mana: fix fast-path ops setup in secondary process
> > net/mlx5: fix fast-path ops setup in secondary process
> > net/mlx4: fix fast-path ops setup in secondary process
> >
> > drivers/net/mana/mana.c | 14 ++
> > drivers/net/mana/mp.c | 8 +
> > drivers/net/mlx4/mlx4_mp.c | 6 +
> > drivers/net/mlx5/linux/mlx5_mp_os.c | 6 +
> > drivers/net/netvsc/hn_ethdev.c | 300 +++++++++++++++++++++++++++-
> > drivers/net/netvsc/hn_nvs.h | 6 +
> > drivers/net/netvsc/hn_rxtx.c | 40 ++--
> > drivers/net/netvsc/hn_var.h | 1 +
> > drivers/net/netvsc/hn_vf.c | 148 ++++++++------
> > 9 files changed, 437 insertions(+), 92 deletions(-)
> >
>
> Looks okay to me, the AI review feedback raised a couple of questions.
> If it is ok will take it as is for this release.
Thank you,
I recommend taking it as is.
" Patch 1: Potential deadlock in hn_vf_close()" is a real issue that may affect netvsc shutdown, I will address this in a future patch.
Thanks,
Long
>
> The AI summary was:
>
> Patch 1: hn_vf_add_unlocked() — when hn_nvs_set_datapath() fails at
> switch_data_path: after a fresh attach, the VF is not detached (no goto detach).
> This leaves inconsistent state.
>
> Patch 2: netvsc_uninit_once() — primary can free the shared memzone while
> secondaries still reference netvsc_shared_data, causing a dangling pointer. The
> local-only secondary_cnt check doesn't reflect remote secondary processes.
>
> Warnings (should consider)
>
> Patch 1: Potential deadlock in hn_vf_close() — holding write lock while calling
> rte_eth_dev_callback_unregister() which synchronously waits for in-progress
> callbacks that may themselves try to acquire the write lock via
> hn_remove_delayed().
>
> There were a couple more things but these were just AI being overly paranoid.
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v5 0/7] multi-process and VF hotplug fixes
2026-02-27 1:59 [PATCH v5 0/7] multi-process and VF hotplug fixes Long Li
` (7 preceding siblings ...)
2026-02-28 0:41 ` [PATCH v5 0/7] multi-process and VF hotplug fixes Stephen Hemminger
@ 2026-02-28 17:03 ` Stephen Hemminger
2026-03-01 5:00 ` [EXTERNAL] " Long Li
8 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2026-02-28 17:03 UTC (permalink / raw)
To: Long Li
Cc: dev, Wei Hu, stable, Dariusz Sosnowski, Viacheslav Ovsiienko,
Bing Zhao, Ori Kam, Suanming Mou, Matan Azrad
On Thu, 26 Feb 2026 17:59:20 -0800
Long Li <longli@microsoft.com> wrote:
> Fix several issues with VF hotplug and multi-process support in
> netvsc, mana, mlx5, and mlx4 drivers:
>
> - Fix race conditions between VSP notifications and DPDK device events
> during VF add/remove, with proper locking of VF-related fields
> - Add multi-process communication infrastructure for coordinating VF
> removal across primary and secondary processes
> - Fix Protection Domain resource leak on device close in mana
> - Fix devargs memory leak during VF hotplug in netvsc
> - Fix fast-path ops (rte_eth_fp_ops) setup in secondary processes for
> mana, mlx5, and mlx4, ensuring burst function pointers are restored
> after STOP->START cycles
>
> v5:
> - Patches 5,6,7: Also restore rte_eth_fp_ops burst function pointers
> (rx_pkt_burst, tx_pkt_burst) in START_RXTX handler, not just queue
> data pointers. Without this, after a STOP->START cycle the secondary
> process burst pointers remain set to dummy functions.
>
> v4:
> - Patch 1: Check hn_vf_add() return value in netvsc_hotplug_retry
> - Patch 1: Track fresh_attach to avoid tearing down original VF
> attachment when configure/start fails on an -EEXIST path
> - Patch 2: Move counter decrement and netvsc_uninit_once() after device
> cleanup in eth_hn_remove() to prevent use-after-free of shared data
> - Patch 2: Clear netvsc_shared_data on init failure paths to prevent
> dangling pointer
>
> v3:
> - Fix review comments from v2
>
> v2:
> - Initial rework of VF add/remove locking
>
> Long Li (7):
> net/netvsc: fix race conditions on VF add/remove events
> net/netvsc: add multi-process VF device removal support
> net/mana: fix PD resource leak on device close
> net/netvsc: fix devargs memory leak on hotplug
> net/mana: fix fast-path ops setup in secondary process
> net/mlx5: fix fast-path ops setup in secondary process
> net/mlx4: fix fast-path ops setup in secondary process
>
> drivers/net/mana/mana.c | 14 ++
> drivers/net/mana/mp.c | 8 +
> drivers/net/mlx4/mlx4_mp.c | 6 +
> drivers/net/mlx5/linux/mlx5_mp_os.c | 6 +
> drivers/net/netvsc/hn_ethdev.c | 300 +++++++++++++++++++++++++++-
> drivers/net/netvsc/hn_nvs.h | 6 +
> drivers/net/netvsc/hn_rxtx.c | 40 ++--
> drivers/net/netvsc/hn_var.h | 1 +
> drivers/net/netvsc/hn_vf.c | 148 ++++++++------
> 9 files changed, 437 insertions(+), 92 deletions(-)
Applied to next-net.
The SHA hashes of those fixes tags was off; fixed them by
matching on the subject.
Also fixed a couple checkpatch warnings for:
CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#295: FILE: drivers/net/netvsc/hn_ethdev.c:1763:
+ rte_atomic_fetch_add_explicit(
^ permalink raw reply [flat|nested] 12+ messages in thread* RE: [EXTERNAL] Re: [PATCH v5 0/7] multi-process and VF hotplug fixes
2026-02-28 17:03 ` Stephen Hemminger
@ 2026-03-01 5:00 ` Long Li
0 siblings, 0 replies; 12+ messages in thread
From: Long Li @ 2026-03-01 5:00 UTC (permalink / raw)
To: Stephen Hemminger
Cc: dev@dpdk.org, Wei Hu, stable@dpdk.org, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Saturday, February 28, 2026 9:04 AM
> To: Long Li <longli@microsoft.com>
> Cc: dev@dpdk.org; Wei Hu <weh@microsoft.com>; stable@dpdk.org; Dariusz
> Sosnowski <dsosnowski@nvidia.com>; Viacheslav Ovsiienko
> <viacheslavo@nvidia.com>; Bing Zhao <bingz@nvidia.com>; Ori Kam
> <orika@nvidia.com>; Suanming Mou <suanmingm@nvidia.com>; Matan Azrad
> <matan@nvidia.com>
> Subject: [EXTERNAL] Re: [PATCH v5 0/7] multi-process and VF hotplug fixes
>
> On Thu, 26 Feb 2026 17:59:20 -0800
> Long Li <longli@microsoft.com> wrote:
>
> > Fix several issues with VF hotplug and multi-process support in
> > netvsc, mana, mlx5, and mlx4 drivers:
> >
> > - Fix race conditions between VSP notifications and DPDK device events
> > during VF add/remove, with proper locking of VF-related fields
> > - Add multi-process communication infrastructure for coordinating VF
> > removal across primary and secondary processes
> > - Fix Protection Domain resource leak on device close in mana
> > - Fix devargs memory leak during VF hotplug in netvsc
> > - Fix fast-path ops (rte_eth_fp_ops) setup in secondary processes for
> > mana, mlx5, and mlx4, ensuring burst function pointers are restored
> > after STOP->START cycles
> >
> > v5:
> > - Patches 5,6,7: Also restore rte_eth_fp_ops burst function pointers
> > (rx_pkt_burst, tx_pkt_burst) in START_RXTX handler, not just queue
> > data pointers. Without this, after a STOP->START cycle the secondary
> > process burst pointers remain set to dummy functions.
> >
> > v4:
> > - Patch 1: Check hn_vf_add() return value in netvsc_hotplug_retry
> > - Patch 1: Track fresh_attach to avoid tearing down original VF
> > attachment when configure/start fails on an -EEXIST path
> > - Patch 2: Move counter decrement and netvsc_uninit_once() after device
> > cleanup in eth_hn_remove() to prevent use-after-free of shared data
> > - Patch 2: Clear netvsc_shared_data on init failure paths to prevent
> > dangling pointer
> >
> > v3:
> > - Fix review comments from v2
> >
> > v2:
> > - Initial rework of VF add/remove locking
> >
> > Long Li (7):
> > net/netvsc: fix race conditions on VF add/remove events
> > net/netvsc: add multi-process VF device removal support
> > net/mana: fix PD resource leak on device close
> > net/netvsc: fix devargs memory leak on hotplug
> > net/mana: fix fast-path ops setup in secondary process
> > net/mlx5: fix fast-path ops setup in secondary process
> > net/mlx4: fix fast-path ops setup in secondary process
> >
> > drivers/net/mana/mana.c | 14 ++
> > drivers/net/mana/mp.c | 8 +
> > drivers/net/mlx4/mlx4_mp.c | 6 +
> > drivers/net/mlx5/linux/mlx5_mp_os.c | 6 +
> > drivers/net/netvsc/hn_ethdev.c | 300 +++++++++++++++++++++++++++-
> > drivers/net/netvsc/hn_nvs.h | 6 +
> > drivers/net/netvsc/hn_rxtx.c | 40 ++--
> > drivers/net/netvsc/hn_var.h | 1 +
> > drivers/net/netvsc/hn_vf.c | 148 ++++++++------
> > 9 files changed, 437 insertions(+), 92 deletions(-)
>
> Applied to next-net.
> The SHA hashes of those fixes tags was off; fixed them by matching on the
> subject.
>
> Also fixed a couple checkpatch warnings for:
>
> CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
> #295: FILE: drivers/net/netvsc/hn_ethdev.c:1763:
> + rte_atomic_fetch_add_explicit(
Thank you!
Long
^ permalink raw reply [flat|nested] 12+ messages in thread