All of lore.kernel.org
 help / color / mirror / Atom feed
From: Long Li <longli@microsoft.com>
To: dev@dpdk.org
Cc: stephen@networkplumber.org, longli@microsoft.com,
	weh@microsoft.com, stable@dpdk.org
Subject: [PATCH v3 6/6] net/netvsc: support runtime queue count reconfiguration
Date: Mon,  2 Mar 2026 18:54:01 -0800	[thread overview]
Message-ID: <20260303025403.269182-7-longli@microsoft.com> (raw)
In-Reply-To: <20260303025403.269182-1-longli@microsoft.com>

Add support for changing the number of RX/TX queues at runtime
via port stop/configure/start. When the queue count changes,
perform a full NVS/RNDIS teardown and reinit to allocate fresh
VMBus subchannels matching the new queue count, then reconfigure
RSS indirection table accordingly.

Key changes:
- hn_dev_configure: detect queue count changes and perform full
  NVS session reinit with subchannel teardown/recreation
- hn_dev_stop: drain pending TX completions (up to 1s) to prevent
  stale completions from corrupting queue state after reconfig
- hn_vf_tx/rx_queue_release: use write lock when nulling VF queue
  pointers to prevent use-after-free with concurrent fast-path
  readers

Signed-off-by: Long Li <longli@microsoft.com>
---
v3:
- Fix subchan_cleanup to call hn_rndis_conf_offload so device retains
  offload config on partial subchannel failure
- Fix recovery path to set hv->primary->chan = hv->channels[0] after
  re-opening channel, preventing use-after-free of stale pointer
- Add hn_chim_init in recovery path to rebuild chimney bitmap
v2:
- Fix reinit_failed recovery: re-map device before chan_open when device
  is unmapped to prevent undefined behavior on unmapped ring buffers
- Move hn_rndis_conf_offload() to after reinit block so offload config
  targets the final RNDIS session instead of being lost on teardown
- Use write lock in hn_vf_tx/rx_queue_release() to prevent race with
  concurrent fast-path readers holding read lock
- Reset RSS indirection table to queue 0 in subchan_cleanup error path
- Fix multi-line comment style to follow DPDK convention
---
 drivers/net/netvsc/hn_ethdev.c | 194 +++++++++++++++++++++++++++++++--
 drivers/net/netvsc/hn_vf.c     |  16 ++-
 2 files changed, 194 insertions(+), 16 deletions(-)

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index 45d69272aa..78ad566309 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -745,6 +745,9 @@ netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type,
 	}
 }
 
+static void hn_detach(struct hn_data *hv);
+static int hn_attach(struct hn_data *hv, unsigned int mtu);
+
 static int hn_dev_configure(struct rte_eth_dev *dev)
 {
 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
@@ -754,6 +757,8 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
 	struct hn_data *hv = dev->data->dev_private;
 	uint64_t unsupported;
 	int i, err, subchan;
+	uint32_t old_subchans = 0;
+	bool device_unmapped = false;
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -778,36 +783,111 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
 
 	hv->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
 
-	err = hn_rndis_conf_offload(hv, txmode->offloads,
-				    rxmode->offloads);
-	if (err) {
-		PMD_DRV_LOG(NOTICE,
-			    "offload configure failed");
-		return err;
-	}
+	/* If queue count unchanged, skip subchannel teardown/reinit */
+	if (RTE_MAX(dev->data->nb_rx_queues,
+		    dev->data->nb_tx_queues) == hv->num_queues)
+		goto skip_reinit;
 
 	hv->num_queues = RTE_MAX(dev->data->nb_rx_queues,
 				 dev->data->nb_tx_queues);
 
+	/* Close all existing subchannels */
+	for (i = 1; i < HN_MAX_CHANNELS; i++) {
+		if (hv->channels[i] != NULL) {
+			rte_vmbus_chan_close(hv->channels[i]);
+			hv->channels[i] = NULL;
+			old_subchans++;
+		}
+	}
+
+	/*
+	 * If subchannels existed, do a full NVS/RNDIS teardown
+	 * and vmbus re-init to ensure a clean NVS session.
+	 * Cannot re-send NVS subchannel request on the same
+	 * session without invalidating the data path.
+	 */
+	if (old_subchans > 0) {
+		PMD_DRV_LOG(NOTICE,
+			    "reinit NVS (had %u subchannels)",
+			    old_subchans);
+
+		hn_chim_uninit(dev);
+		rte_free(hv->primary->rxbuf_info);
+		hv->primary->rxbuf_info = NULL;
+		hn_detach(hv);
+
+		rte_vmbus_chan_close(hv->channels[0]);
+		rte_free(hv->channels[0]);
+		hv->channels[0] = NULL;
+
+		rte_vmbus_unmap_device(hv->vmbus);
+		device_unmapped = true;
+		err = rte_vmbus_map_device(hv->vmbus);
+		if (err) {
+			PMD_DRV_LOG(ERR,
+				    "Could not re-map vmbus device!");
+			goto reinit_failed;
+		}
+		device_unmapped = false;
+
+		hv->rxbuf_res = hv->vmbus->resource[HV_RECV_BUF_MAP];
+		hv->chim_res  = hv->vmbus->resource[HV_SEND_BUF_MAP];
+
+		err = rte_vmbus_chan_open(hv->vmbus, &hv->channels[0]);
+		if (err) {
+			PMD_DRV_LOG(ERR,
+				    "Could not re-open vmbus channel!");
+			goto reinit_failed;
+		}
+
+		hv->primary->chan = hv->channels[0];
+
+		rte_vmbus_set_latency(hv->vmbus, hv->channels[0],
+				      hv->latency);
+
+		err = hn_attach(hv, dev->data->mtu);
+		if (err) {
+			rte_vmbus_chan_close(hv->channels[0]);
+			rte_free(hv->channels[0]);
+			hv->channels[0] = NULL;
+			PMD_DRV_LOG(ERR,
+				    "NVS reinit failed: %d", err);
+			goto reinit_failed;
+		}
+
+		err = hn_chim_init(dev);
+		if (err) {
+			hn_detach(hv);
+			rte_vmbus_chan_close(hv->channels[0]);
+			rte_free(hv->channels[0]);
+			hv->channels[0] = NULL;
+			PMD_DRV_LOG(ERR,
+				    "chim reinit failed: %d", err);
+			goto reinit_failed;
+		}
+	}
+
 	for (i = 0; i < NDIS_HASH_INDCNT; i++)
 		hv->rss_ind[i] = i % dev->data->nb_rx_queues;
 
 	hn_rss_hash_init(hv, rss_conf);
 
 	subchan = hv->num_queues - 1;
+
+	/* Allocate fresh subchannels and configure RSS */
 	if (subchan > 0) {
 		err = hn_subchan_configure(hv, subchan);
 		if (err) {
 			PMD_DRV_LOG(NOTICE,
 				    "subchannel configuration failed");
-			return err;
+			goto subchan_cleanup;
 		}
 
 		err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE);
 		if (err) {
 			PMD_DRV_LOG(NOTICE,
 				"rss disable failed");
-			return err;
+			goto subchan_cleanup;
 		}
 
 		if (rss_conf->rss_hf != 0) {
@@ -815,12 +895,82 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
 			if (err) {
 				PMD_DRV_LOG(NOTICE,
 					    "initial RSS config failed");
-				return err;
+				goto subchan_cleanup;
 			}
 		}
 	}
 
+skip_reinit:
+	/* Apply offload config after reinit so it targets the final RNDIS session */
+	err = hn_rndis_conf_offload(hv, txmode->offloads,
+				    rxmode->offloads);
+	if (err) {
+		PMD_DRV_LOG(NOTICE,
+			    "offload configure failed");
+		return err;
+	}
+
 	return hn_vf_configure_locked(dev, dev_conf);
+
+subchan_cleanup:
+	for (i = 1; i < HN_MAX_CHANNELS; i++) {
+		if (hv->channels[i] != NULL) {
+			rte_vmbus_chan_close(hv->channels[i]);
+			hv->channels[i] = NULL;
+		}
+	}
+	hv->num_queues = 1;
+	for (i = 0; i < NDIS_HASH_INDCNT; i++)
+		hv->rss_ind[i] = 0;
+
+	/* Apply offload config so device is usable on primary queue */
+	hn_rndis_conf_offload(hv, txmode->offloads, rxmode->offloads);
+	return err;
+
+reinit_failed:
+	/*
+	 * Device is in a broken state after failed reinit.
+	 * Try to re-establish minimal connectivity.
+	 */
+	PMD_DRV_LOG(ERR,
+		    "reinit failed (err %d), attempting recovery", err);
+	if (hv->channels[0] == NULL) {
+		if (device_unmapped) {
+			if (rte_vmbus_map_device(hv->vmbus)) {
+				hv->num_queues = 0;
+				PMD_DRV_LOG(ERR,
+					    "recovery failed, could not re-map device");
+				return err;
+			}
+			hv->rxbuf_res = hv->vmbus->resource[HV_RECV_BUF_MAP];
+			hv->chim_res  = hv->vmbus->resource[HV_SEND_BUF_MAP];
+		}
+		if (rte_vmbus_chan_open(hv->vmbus, &hv->channels[0]) == 0) {
+			if (hn_attach(hv, dev->data->mtu) == 0) {
+				hv->primary->chan = hv->channels[0];
+				if (hn_chim_init(dev) != 0)
+					PMD_DRV_LOG(WARNING,
+						    "chim reinit failed during recovery");
+				hv->num_queues = 1;
+				PMD_DRV_LOG(NOTICE,
+					    "recovery successful on primary channel");
+			} else {
+				rte_vmbus_chan_close(hv->channels[0]);
+				rte_free(hv->channels[0]);
+				hv->channels[0] = NULL;
+				hv->num_queues = 0;
+				PMD_DRV_LOG(ERR,
+					    "recovery failed, device unusable");
+			}
+		} else {
+			hv->num_queues = 0;
+			PMD_DRV_LOG(ERR,
+				    "recovery failed, device unusable");
+		}
+	} else {
+		hv->num_queues = 1;
+	}
+	return err;
 }
 
 static int hn_dev_stats_get(struct rte_eth_dev *dev,
@@ -1073,6 +1223,7 @@ hn_dev_stop(struct rte_eth_dev *dev)
 {
 	struct hn_data *hv = dev->data->dev_private;
 	int i, ret;
+	unsigned int retry;
 
 	PMD_INIT_FUNC_TRACE();
 	dev->data->dev_started = 0;
@@ -1081,6 +1232,29 @@ hn_dev_stop(struct rte_eth_dev *dev)
 	hn_rndis_set_rxfilter(hv, 0);
 	ret = hn_vf_stop(dev);
 
+	/*
+	 * Drain pending TX completions to prevent stale completions
+	 * from corrupting queue state after port reconfiguration.
+	 */
+	for (retry = 0; retry < 100; retry++) {
+		uint32_t pending = 0;
+
+		for (i = 0; i < hv->num_queues; i++) {
+			struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+			if (txq == NULL)
+				continue;
+			hn_process_events(hv, i, 0);
+			pending += rte_mempool_in_use_count(txq->txdesc_pool);
+		}
+		if (pending == 0)
+			break;
+		rte_delay_ms(10);
+	}
+	if (retry >= 100)
+		PMD_DRV_LOG(WARNING,
+			    "Failed to drain all TX completions");
+
 	for (i = 0; i < hv->num_queues; i++) {
 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
index 0ecfaf54ea..e77232bfb3 100644
--- a/drivers/net/netvsc/hn_vf.c
+++ b/drivers/net/netvsc/hn_vf.c
@@ -637,12 +637,14 @@ void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id)
 {
 	struct rte_eth_dev *vf_dev;
 
-	rte_rwlock_read_lock(&hv->vf_lock);
+	rte_rwlock_write_lock(&hv->vf_lock);
 	vf_dev = hn_get_vf_dev(hv);
-	if (vf_dev && vf_dev->dev_ops->tx_queue_release)
+	if (vf_dev && vf_dev->dev_ops->tx_queue_release) {
 		(*vf_dev->dev_ops->tx_queue_release)(vf_dev, queue_id);
+		vf_dev->data->tx_queues[queue_id] = NULL;
+	}
 
-	rte_rwlock_read_unlock(&hv->vf_lock);
+	rte_rwlock_write_unlock(&hv->vf_lock);
 }
 
 int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
@@ -669,11 +671,13 @@ void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id)
 {
 	struct rte_eth_dev *vf_dev;
 
-	rte_rwlock_read_lock(&hv->vf_lock);
+	rte_rwlock_write_lock(&hv->vf_lock);
 	vf_dev = hn_get_vf_dev(hv);
-	if (vf_dev && vf_dev->dev_ops->rx_queue_release)
+	if (vf_dev && vf_dev->dev_ops->rx_queue_release) {
 		(*vf_dev->dev_ops->rx_queue_release)(vf_dev, queue_id);
-	rte_rwlock_read_unlock(&hv->vf_lock);
+		vf_dev->data->rx_queues[queue_id] = NULL;
+	}
+	rte_rwlock_write_unlock(&hv->vf_lock);
 }
 
 int hn_vf_stats_get(struct rte_eth_dev *dev,
-- 
2.43.0


  parent reply	other threads:[~2026-03-03  2:54 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-03  2:53 [PATCH v3 0/6] net/netvsc: bug fixes and runtime queue reconfiguration Long Li
2026-03-03  2:53 ` [PATCH v3 1/6] net/netvsc: fix subchannel leak on device removal Long Li
2026-03-03  2:53 ` [PATCH v3 2/6] net/netvsc: fix double-free of primary Rx queue on uninit Long Li
2026-03-03  2:53 ` [PATCH v3 3/6] net/netvsc: fix resource leak in init error path Long Li
2026-03-03  2:53 ` [PATCH v3 4/6] net/netvsc: fix event callback leak on rxfilter failure Long Li
2026-03-03  2:54 ` [PATCH v3 5/6] net/netvsc: fix resource leaks in MTU change path Long Li
2026-03-03  2:54 ` Long Li [this message]
2026-03-03  3:56 ` [PATCH v3 0/6] net/netvsc: bug fixes and runtime queue reconfiguration Long Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260303025403.269182-7-longli@microsoft.com \
    --to=longli@microsoft.com \
    --cc=dev@dpdk.org \
    --cc=stable@dpdk.org \
    --cc=stephen@networkplumber.org \
    --cc=weh@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.