From: Shani Peretz <shperetz@nvidia.com>
To: <dev@dpdk.org>
Cc: <rasland@nvidia.com>, <dsosnowski@nvidia.com>,
Shani Peretz <shperetz@nvidia.com>, <a.schollmeyer@syseleven.de>,
<stable@dpdk.org>, Viacheslav Ovsiienko <viacheslavo@nvidia.com>,
Bing Zhao <bingz@nvidia.com>, Ori Kam <orika@nvidia.com>,
Suanming Mou <suanmingm@nvidia.com>,
Matan Azrad <matan@nvidia.com>, Xueming Li <xuemingl@nvidia.com>
Subject: [PATCH v2] net/mlx5: allow MTU mismatch for running shared Rx queues
Date: Thu, 29 Jan 2026 10:13:23 +0200 [thread overview]
Message-ID: <20260129081323.4713-1-shperetz@nvidia.com> (raw)
In-Reply-To: <20251117073937.1514124-1-shperetz@nvidia.com>
Previously, mlx5_shared_rxq_match() rejected any MTU mismatch between
a port and the shared RX queue, even when the queue hardware was already
running. This caused failures when hot-adding representors after runtime
MTU changes (new port has default MTU=1500, but running queue was created
with MTU=9000). It also prevented proper port reconfiguration scenarios.
The fix allows MTU mismatches when rxq_ctrl->obj != NULL (queue is
running), since runtime MTU changes via rte_eth_dev_set_mtu() only update
software bookkeeping without recreating hardware resources. Stopped queues
still enforce MTU matching to trigger proper reconfiguration.
This patch also removes the redundant priv->mtu field and uses
dev->data->mtu consistently for MTU tracking throughout the driver.
Fixes: 4414eb800708 ("net/mlx5: store MTU at Rx queue allocation time")
Fixes: 09c2555303be ("net/mlx5: support shared Rx queue")
Cc: a.schollmeyer@syseleven.de
Cc: stable@dpdk.org
Signed-off-by: Shani Peretz <shperetz@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
doc/guides/nics/mlx5.rst | 6 +++
drivers/net/mlx5/linux/mlx5_os.c | 6 +--
drivers/net/mlx5/mlx5.h | 1 -
drivers/net/mlx5/mlx5_ethdev.c | 3 --
drivers/net/mlx5/mlx5_rxq.c | 85 ++++++++++++++++++++++++------
drivers/net/mlx5/windows/mlx5_os.c | 6 +--
6 files changed, 78 insertions(+), 29 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 904a0ac358..66323907a9 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -1827,6 +1827,12 @@ Limitations
#. Counters of received packets and bytes of queues in the same group and queue ID are same.
+#. Each Rx queue in share group must have the same queue configuration.
+
+#. Ports in share group must have equal MTU at port start time.
+
+#. Reconfiguring a shared queue while it is in use (started or referenced by flows) is not allowed.
+
.. _mlx5_rx_threshold:
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 7f73183bb1..e8c4fb62c8 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1334,7 +1334,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->sh = sh;
priv->dev_port = spawn->phys_port;
priv->pci_dev = spawn->pci_dev;
- priv->mtu = RTE_ETHER_MTU;
/* Some internal functions rely on Netlink sockets, open them now. */
priv->nl_socket_rdma = nl_rdma;
priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE, 0);
@@ -1606,14 +1605,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
}
#endif
/* Get actual MTU if possible. */
- err = mlx5_get_mtu(eth_dev, &priv->mtu);
+ err = mlx5_get_mtu(eth_dev, ð_dev->data->mtu);
if (err) {
err = rte_errno;
goto error;
}
- eth_dev->data->mtu = priv->mtu;
DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
- priv->mtu);
+ eth_dev->data->mtu);
/* Initialize burst functions to prevent crashes before link-up. */
eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 966e802f5f..551f3b9d17 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1983,7 +1983,6 @@ struct mlx5_priv {
uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
/* Device properties. */
- uint16_t mtu; /* Configured MTU. */
uint16_t min_mtu; /* Minimum MTU allowed on the NIC. */
uint16_t max_mtu; /* Maximum MTU allowed on the NIC. */
unsigned int isolated:1; /* Whether isolated mode is enabled. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c93a7ac4f2..00a1d444ef 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -679,7 +679,6 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev, size_t *no_of_elements)
int
mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
- struct mlx5_priv *priv = dev->data->dev_private;
uint16_t kern_mtu = 0;
int ret;
@@ -688,7 +687,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
return ret;
if (kern_mtu == mtu) {
- priv->mtu = mtu;
DRV_LOG(DEBUG, "port %u adapter MTU was already set to %u",
dev->data->port_id, mtu);
return 0;
@@ -702,7 +700,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
if (ret)
return ret;
if (kern_mtu == mtu) {
- priv->mtu = mtu;
DRV_LOG(DEBUG, "port %u adapter MTU set to %u",
dev->data->port_id, mtu);
return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 9210a92c5f..c4ba746d47 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -659,8 +659,6 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc,
struct mlx5_rxq_ctrl **rxq_ctrl)
{
struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_rxq_priv *rxq;
- bool empty;
if (*desc > mlx5_dev_get_max_wq_size(priv->sh)) {
DRV_LOG(ERR,
@@ -699,14 +697,6 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc,
if ((*rxq_ctrl)->obj != NULL)
/* Some port using shared Rx queue has been started. */
return 0;
- /* Release all owner RxQ to reconfigure Shared RxQ. */
- do {
- rxq = LIST_FIRST(&(*rxq_ctrl)->owners);
- LIST_REMOVE(rxq, owner_entry);
- empty = LIST_EMPTY(&(*rxq_ctrl)->owners);
- mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx);
- } while (!empty);
- *rxq_ctrl = NULL;
}
return 0;
}
@@ -780,10 +770,21 @@ mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev,
dev->data->port_id, idx);
return false;
}
- if (priv->mtu != rxq_ctrl->mtu) {
- DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch",
- dev->data->port_id, idx);
- return false;
+ if (dev->data->mtu != rxq_ctrl->mtu) {
+ /*
+ * MTU mismatch is only a problem when the queue hasn't been started yet.
+ * If rxq_ctrl->obj is NULL, the queue hardware objects haven't been created,
+ * meaning we're in the initial configuration phase where MTU must match.
+ * If obj != NULL, the queue is already running with its hardware configured,
+ * and runtime MTU changes are safe as they only update software bookkeeping
+ * without recreating hardware resources.
+ */
+ if (rxq_ctrl->obj == NULL) {
+ DRV_LOG(DEBUG, "port %u queue index %u: mtu mismatch with existing shared rxq_ctrl "
+ "(port mtu=%u rxq_ctrl mtu=%u), reconfiguration needed",
+ dev->data->port_id, idx, dev->data->mtu, rxq_ctrl->mtu);
+ return false;
+ }
}
if (priv->dev_data->dev_conf.intr_conf.rxq !=
spriv->dev_data->dev_conf.intr_conf.rxq) {
@@ -926,8 +927,57 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
if (rxq_ctrl != NULL &&
!mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket,
conf, mp)) {
- rte_errno = EINVAL;
- return -rte_errno;
+ struct mlx5_rxq_priv *rxq_tmp;
+ bool empty;
+
+ /*
+ * Configuration mismatch detected with existing shared RXQ.
+ * We need to reconfigure, but only if it's safe to do so.
+ *
+ * First check: If hardware objects are allocated, the shared queue has
+ * been started. Reconfiguration would require destroying and recreating
+ * hardware resources, which cannot be done while the queue is active.
+ * Return EBUSY to force caller to stop the queue first.
+ */
+ if (rxq_ctrl->obj != NULL) {
+ DRV_LOG(ERR, "port %u queue index %u: cannot reconfigure shared RXQ while started",
+ dev->data->port_id, idx);
+ rte_errno = EBUSY;
+ return -rte_errno;
+ }
+
+ /*
+ * Second check: Even if hardware objects aren't allocated yet,
+ * verify that no owner port is actively using this queue.
+ * refcnt == 1 means the queue exists but is idle (only setup reference).
+ * refcnt > 1 means the queue is being used by flows or other components.
+ * This prevents releasing a queue that other ports depend on.
+ */
+ LIST_FOREACH(rxq_tmp, &rxq_ctrl->owners, owner_entry) {
+ if (rxq_tmp->refcnt > 1) {
+ DRV_LOG(ERR, "port %u queue index %u: cannot reconfigure shared RXQ "
+ "while other ports are running",
+ dev->data->port_id, idx);
+ rte_errno = EBUSY;
+ return -rte_errno;
+ }
+ }
+
+ /*
+ * Safe to reconfigure: hardware not started and no active users.
+ * Release all owner ports from the existing shared rxq_ctrl.
+ * This will decrement references and eventually free the old rxq_ctrl.
+ * Setting rxq_ctrl to NULL triggers creation of a new one below with
+ * the updated configuration.
+ */
+ do {
+ rxq_tmp = LIST_FIRST(&rxq_ctrl->owners);
+ LIST_REMOVE(rxq_tmp, owner_entry);
+ empty = LIST_EMPTY(&rxq_ctrl->owners);
+ mlx5_rxq_release(ETH_DEV(rxq_tmp->priv), rxq_tmp->idx);
+ } while (!empty);
+
+ rxq_ctrl = NULL;
}
} else {
res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl);
@@ -1813,7 +1863,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
LIST_INIT(&tmpl->owners);
MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
/*
- * Save the original MTU to check against for shared rx queues.
+ * Save the current MTU to check against for shared rx queues.
+ * Use dev->data->mtu which reflects the actual current MTU.
*/
tmpl->mtu = dev->data->mtu;
/*
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index 4eadc872a5..5dcabfbed2 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -396,7 +396,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->sh = sh;
priv->dev_port = spawn->phys_port;
priv->pci_dev = spawn->pci_dev;
- priv->mtu = RTE_ETHER_MTU;
priv->mp_id.port_id = port_id;
strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
priv->representor = !!switch_info->representor;
@@ -504,14 +503,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
}
#endif
/* Get actual MTU if possible. */
- err = mlx5_get_mtu(eth_dev, &priv->mtu);
+ err = mlx5_get_mtu(eth_dev, ð_dev->data->mtu);
if (err) {
err = rte_errno;
goto error;
}
- eth_dev->data->mtu = priv->mtu;
DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id,
- priv->mtu);
+ eth_dev->data->mtu);
/* Initialize burst functions to prevent crashes before link-up. */
eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
--
2.43.0
next prev parent reply other threads:[~2026-01-29 8:13 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-17 7:39 [PATCH] net/mlx5: update stored Rx queue MTU when port MTU changes Shani Peretz
2025-11-18 9:52 ` Adrian Schollmeyer
2026-01-29 8:13 ` Shani Peretz [this message]
2026-02-08 14:37 ` [PATCH v2] net/mlx5: allow MTU mismatch for running shared Rx queues Raslan Darawsheh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260129081323.4713-1-shperetz@nvidia.com \
--to=shperetz@nvidia.com \
--cc=a.schollmeyer@syseleven.de \
--cc=bingz@nvidia.com \
--cc=dev@dpdk.org \
--cc=dsosnowski@nvidia.com \
--cc=matan@nvidia.com \
--cc=orika@nvidia.com \
--cc=rasland@nvidia.com \
--cc=stable@dpdk.org \
--cc=suanmingm@nvidia.com \
--cc=viacheslavo@nvidia.com \
--cc=xuemingl@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox