From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
Leon Romanovsky <leon@kernel.org>,
Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
<netdev@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
<linux-kernel@vger.kernel.org>, Gal Pressman <gal@nvidia.com>,
Moshe Shemesh <moshe@nvidia.com>, Shay Drory <shayd@nvidia.com>
Subject: [PATCH net 1/3] net/mlx5: Fix Unbinding uplink-netdev in switchdev mode
Date: Mon, 26 Jan 2026 09:14:53 +0200 [thread overview]
Message-ID: <1769411695-18820-2-git-send-email-tariqt@nvidia.com> (raw)
In-Reply-To: <1769411695-18820-1-git-send-email-tariqt@nvidia.com>
From: Shay Drory <shayd@nvidia.com>
It is possible to unbind the uplink ETH driver while the E-Switch is
in switchdev mode. This leads to netdevice reference counting issues[1],
as the driver removal path was not designed to clean up from this state.
During uplink ETH driver removal (_mlx5e_remove), the code now waits for
any concurrent E-Switch mode transition to finish. It then removes the
REPs auxiliary device, if exists. This ensures a graceful cleanup.
[1]
unregister_netdevice: waiting for eth2 to become free. Usage count = 2
ref_tracker: netdev@00000000c912e04b has 1/1 users at
ib_device_set_netdev+0x130/0x270 [ib_core]
mlx5_ib_vport_rep_load+0xf4/0x3e0 [mlx5_ib]
mlx5_esw_offloads_rep_load+0xc7/0xe0 [mlx5_core]
esw_offloads_enable+0x583/0x900 [mlx5_core]
mlx5_eswitch_enable_locked+0x1b2/0x290 [mlx5_core]
mlx5_devlink_eswitch_mode_set+0x107/0x3e0 [mlx5_core]
devlink_nl_eswitch_set_doit+0x60/0xd0
genl_family_rcv_msg_doit+0xe0/0x130
genl_rcv_msg+0x183/0x290
netlink_rcv_skb+0x4b/0xf0
genl_rcv+0x24/0x40
netlink_unicast+0x255/0x380
netlink_sendmsg+0x1f3/0x420
__sock_sendmsg+0x38/0x60
__sys_sendto+0x119/0x180
__x64_sys_sendto+0x20/0x30
Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/dev.c | 14 ++++++++++
.../net/ethernet/mellanox/mlx5/core/en_main.c | 1 +
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +++
.../mellanox/mlx5/core/eswitch_offloads.c | 26 +++++++++++++++++++
.../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 +
5 files changed, 46 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 64c04f52990f..781e39b5aa1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -575,3 +575,17 @@ bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev
return plen && flen && flen == plen &&
!memcmp(fsystem_guid, psystem_guid, flen);
}
+
+void mlx5_core_reps_aux_devs_remove(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ if (priv->adev[MLX5_INTERFACE_PROTOCOL_ETH])
+ device_lock_assert(&priv->adev[MLX5_INTERFACE_PROTOCOL_ETH]->adev.dev);
+ else
+ mlx5_core_err(dev, "ETH driver already removed\n");
+ if (priv->adev[MLX5_INTERFACE_PROTOCOL_IB_REP])
+ del_adev(&priv->adev[MLX5_INTERFACE_PROTOCOL_IB_REP]->adev);
+ if (priv->adev[MLX5_INTERFACE_PROTOCOL_ETH_REP])
+ del_adev(&priv->adev[MLX5_INTERFACE_PROTOCOL_ETH_REP]->adev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9042c8a388e4..f83359f7fdea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6842,6 +6842,7 @@ static void _mlx5e_remove(struct auxiliary_device *adev)
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = edev->mdev;
+ mlx5_eswitch_safe_aux_devs_remove(mdev);
mlx5_core_uplink_netdev_set(mdev, NULL);
if (priv->profile)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index e7fe43799b23..714ad28e8445 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -929,6 +929,7 @@ int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_v
int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
u16 vport_num);
bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev);
+void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -1012,6 +1013,9 @@ mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id)
return false;
}
+static inline void
+mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev) {}
+
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ea94a727633f..02b7e474586d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3981,6 +3981,32 @@ static bool mlx5_devlink_switchdev_active_mode_change(struct mlx5_eswitch *esw,
return true;
}
+#define MLX5_ESW_HOLD_TIMEOUT_MS 7000
+#define MLX5_ESW_HOLD_RETRY_DELAY_MS 500
+
+void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev)
+{
+ unsigned long timeout;
+ bool hold_esw = true;
+
+ /* Wait for any concurrent eswitch mode transition to complete. */
+ if (!mlx5_esw_hold(dev)) {
+ timeout = jiffies + msecs_to_jiffies(MLX5_ESW_HOLD_TIMEOUT_MS);
+ while (!mlx5_esw_hold(dev)) {
+ if (!time_before(jiffies, timeout)) {
+ hold_esw = false;
+ break;
+ }
+ msleep(MLX5_ESW_HOLD_RETRY_DELAY_MS);
+ }
+ }
+ if (hold_esw) {
+ if (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS)
+ mlx5_core_reps_aux_devs_remove(dev);
+ mlx5_esw_release(dev);
+ }
+}
+
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index cfebc110c02f..99b0a25054ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -290,6 +290,7 @@ int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev);
bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev);
+void mlx5_core_reps_aux_devs_remove(struct mlx5_core_dev *dev);
void mlx5_fw_reporters_create(struct mlx5_core_dev *dev);
int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
--
2.40.1
next prev parent reply other threads:[~2026-01-26 7:15 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-26 7:14 [PATCH net 0/3] mlx5 misc fixes 2026-01-26 Tariq Toukan
2026-01-26 7:14 ` Tariq Toukan [this message]
2026-01-26 7:14 ` [PATCH net 2/3] net/mlx5e: TC, delete flows only for existing peers Tariq Toukan
2026-01-26 7:14 ` [PATCH net 3/3] net/mlx5e: Account for netdev stats in ndo_get_stats64 Tariq Toukan
2026-01-28 3:52 ` Jakub Kicinski
2026-01-28 11:28 ` Gal Pressman
2026-01-27 17:38 ` [PATCH net 0/3] mlx5 misc fixes 2026-01-26 Simon Horman
2026-01-28 2:20 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1769411695-18820-2-git-send-email-tariqt@nvidia.com \
--to=tariqt@nvidia.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=gal@nvidia.com \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=mbloch@nvidia.com \
--cc=moshe@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=shayd@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox