From: Saeed Mahameed <saeed@kernel.org>
To: Saeed Mahameed <saeedm@nvidia.com>, Leon Romanovsky <leonro@nvidia.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>,
linux-rdma@vger.kernel.org, netdev@vger.kernel.org,
Jiri Pirko <jiri@nvidia.com>
Subject: [PATCH mlx5-next 3/8] RDMA/mlx5: Track netdev to avoid deadlock during netdev notifier unregister
Date: Wed, 4 Jan 2023 20:17:51 -0800 [thread overview]
Message-ID: <20230105041756.677120-4-saeed@kernel.org> (raw)
In-Reply-To: <20230105041756.677120-1-saeed@kernel.org>
From: Jiri Pirko <jiri@nvidia.com>
When removing a network namespace with mlx5 devlink instance being in
it, following callchain is performed:
cleanup_net (takes down_read(&pernet_ops_rwsem)
devlink_pernet_pre_exit()
devlink_reload()
mlx5_devlink_reload_down()
mlx5_unload_one_devl_locked()
mlx5_detach_device()
del_adev()
mlx5r_remove()
__mlx5_ib_remove()
mlx5_ib_roce_cleanup()
mlx5_remove_netdev_notifier()
unregister_netdevice_notifier (takes down_write(&pernet_ops_rwsem)
This deadlocks.
Resolve this by converting to register_netdevice_notifier_dev_net()
which does not take pernet_ops_rwsem and moves the notifier block around
according to netdev it takes as arg.
Use previously introduced netdev added/removed events to track uplink
netdev to be used for register_netdevice_notifier_dev_net() purposes.
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
drivers/infiniband/hw/mlx5/main.c | 78 +++++++++++++------
drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +
.../net/ethernet/mellanox/mlx5/core/events.c | 2 +
3 files changed, 59 insertions(+), 24 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c669ef6e47e7..dc32e4518a28 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3012,26 +3012,63 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
+static void mlx5_netdev_notifier_register(struct mlx5_roce *roce,
+ struct net_device *netdev)
{
int err;
- dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
- err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
- if (err) {
- dev->port[port_num].roce.nb.notifier_call = NULL;
- return err;
- }
+ if (roce->tracking_netdev)
+ return;
+ roce->tracking_netdev = netdev;
+ roce->nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn);
+ WARN_ON(err);
+}
- return 0;
+static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce)
+{
+ if (!roce->tracking_netdev)
+ return;
+ unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb,
+ &roce->nn);
+ roce->tracking_netdev = NULL;
}
-static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
+static int mlx5e_mdev_notifier_event(struct notifier_block *nb,
+ unsigned long event, void *data)
{
- if (dev->port[port_num].roce.nb.notifier_call) {
- unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
- dev->port[port_num].roce.nb.notifier_call = NULL;
+ struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb);
+ struct net_device *netdev = data;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
+ if (netdev)
+ mlx5_netdev_notifier_register(roce, netdev);
+ else
+ mlx5_netdev_notifier_unregister(roce);
+ break;
+ default:
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
+}
+
+static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_roce *roce = &dev->port[port_num].roce;
+
+ roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event;
+ mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb);
+ mlx5_core_uplink_netdev_event_replay(dev->mdev);
+}
+
+static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_roce *roce = &dev->port[port_num].roce;
+
+ mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb);
+ mlx5_netdev_notifier_unregister(roce);
}
static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
@@ -3138,7 +3175,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
if (mpi->mdev_events.notifier_call)
mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
mpi->mdev_events.notifier_call = NULL;
- mlx5_remove_netdev_notifier(ibdev, port_num);
+ mlx5_mdev_netdev_untrack(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
comps = mpi->mdev_refcnt;
@@ -3196,12 +3233,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
if (err)
goto unbind;
- err = mlx5_add_netdev_notifier(ibdev, port_num);
- if (err) {
- mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
- port_num + 1);
- goto unbind;
- }
+ mlx5_mdev_netdev_track(ibdev, port_num);
mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
@@ -3909,9 +3941,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
/* Register only for native ports */
- err = mlx5_add_netdev_notifier(dev, port_num);
- if (err)
- return err;
+ mlx5_mdev_netdev_track(dev, port_num);
err = mlx5_enable_eth(dev);
if (err)
@@ -3920,7 +3950,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
- mlx5_remove_netdev_notifier(dev, port_num);
+ mlx5_mdev_netdev_untrack(dev, port_num);
return err;
}
@@ -3938,7 +3968,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
mlx5_disable_eth(dev);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
- mlx5_remove_netdev_notifier(dev, port_num);
+ mlx5_mdev_netdev_untrack(dev, port_num);
}
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 8b91babdd4c0..7394e7f36ba7 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -832,6 +832,9 @@ struct mlx5_roce {
rwlock_t netdev_lock;
struct net_device *netdev;
struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct notifier_block mdev_nb;
+ struct net_device *tracking_netdev;
atomic_t tx_port_affinity;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index 9459e56ee90a..718cf09c28ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -424,6 +424,7 @@ int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_b
return blocking_notifier_chain_register(&events->sw_nh, nb);
}
+EXPORT_SYMBOL(mlx5_blocking_notifier_register);
int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
{
@@ -431,6 +432,7 @@ int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier
return blocking_notifier_chain_unregister(&events->sw_nh, nb);
}
+EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
void *data)
--
2.38.1
next prev parent reply other threads:[~2023-01-05 4:19 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-05 4:17 [PATCH mlx5-next 0/8] mlx5 IPsec RoCEv2 support and netdev events fixes in RDMA Saeed Mahameed
2023-01-05 4:17 ` [PATCH mlx5-next 1/8] net/mlx5e: Fix trap event handling Saeed Mahameed
2023-01-05 13:06 ` Leon Romanovsky
2023-01-05 4:17 ` [PATCH mlx5-next 2/8] net/mlx5e: Propagate an internal event in case uplink netdev changes Saeed Mahameed
2023-01-05 13:07 ` Leon Romanovsky
2023-01-05 4:17 ` Saeed Mahameed [this message]
2023-01-05 4:17 ` [PATCH mlx5-next 4/8] net/mlx5: Introduce new destination type TABLE_TYPE Saeed Mahameed
2023-01-05 4:17 ` [PATCH mlx5-next 5/8] net/mlx5: Implement " Saeed Mahameed
2023-01-05 4:17 ` [PATCH mlx5-next 6/8] net/mlx5: Add IPSec priorities in RDMA namespaces Saeed Mahameed
2023-01-05 4:17 ` [PATCH mlx5-next 7/8] net/mlx5: Configure IPsec steering for ingress RoCEv2 traffic Saeed Mahameed
2023-01-05 4:17 ` [PATCH mlx5-next 8/8] net/mlx5: Configure IPsec steering for egress " Saeed Mahameed
2023-01-05 13:05 ` [PATCH mlx5-next 0/8] mlx5 IPsec RoCEv2 support and netdev events fixes in RDMA Leon Romanovsky
2023-01-05 18:37 ` Jakub Kicinski
2023-01-05 20:12 ` Leon Romanovsky
2023-01-05 20:25 ` Jakub Kicinski
2023-01-06 6:41 ` Leon Romanovsky
2023-01-06 19:34 ` Saeed Mahameed
2023-01-05 18:38 ` Jakub Kicinski
2023-01-06 19:35 ` Saeed Mahameed
2023-01-06 21:17 ` Jakub Kicinski
2023-01-06 23:38 ` Jason Gunthorpe
2023-01-07 6:37 ` Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230105041756.677120-4-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=jgg@nvidia.com \
--cc=jiri@nvidia.com \
--cc=leonro@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.