From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
andrew+netdev@lunn.ch, horms@kernel.org, sdf@fomichev.me,
hramamurthy@google.com, kuniyu@amazon.com, jdamato@fastly.com,
Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net-next v2 1/8] net: avoid potential race between netdev_get_by_index_lock() and netns switch
Date: Tue, 8 Apr 2025 12:59:48 -0700 [thread overview]
Message-ID: <20250408195956.412733-2-kuba@kernel.org> (raw)
In-Reply-To: <20250408195956.412733-1-kuba@kernel.org>
netdev_get_by_index_lock() performs following steps:
rcu_lock();
dev = lookup(netns, ifindex);
dev_get(dev);
rcu_unlock();
[... lock & validate the dev ...]
return dev
Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.
We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.
AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.
Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
include/linux/netdevice.h | 6 +++++-
net/core/dev.h | 2 +-
net/core/dev.c | 25 ++++++++++++++++++-------
3 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..8e9be80bc167 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
* @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
+ * protected by @lock
*
* @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {
bool dismantle;
+ /** @moving_ns: device is changing netns, protected by @lock */
+ bool moving_ns;
+
enum {
RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
* @net_shaper_hierarchy, @reg_state, @threaded
*
* Double protects:
- * @up
+ * @up, @moving_ns, @nd_net
*
* Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues
diff --git a/net/core/dev.h b/net/core/dev.h
index 710abc05ebdb..e7bf21f52fc7 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -30,7 +30,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
-struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ccc6dc5303e..8c13e5339cc9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (!dev)
return NULL;
@@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
* This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away.
*/
-struct net_device *__netdev_put_lock(struct net_device *dev)
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{
netdev_lock(dev);
- if (dev->reg_state > NETREG_REGISTERED) {
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
netdev_unlock(dev);
dev_put(dev);
return NULL;
@@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
if (!dev)
return NULL;
- return __netdev_put_lock(dev);
+ return __netdev_put_lock(dev, net);
}
struct net_device *
@@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (dev)
return dev;
@@ -12157,7 +12158,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
netif_close(dev);
/* And unlink it from device chain */
unlist_netdevice(dev);
- netdev_unlock_ops(dev);
+
+ if (!netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ dev->moving_ns = true;
+ netdev_unlock(dev);
synchronize_net();
@@ -12193,7 +12198,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
move_netdevice_notifiers_dev_net(dev, net);
/* Actually switch the network namespace */
+ netdev_lock(dev);
dev_net_set(dev, net);
+ netdev_unlock(dev);
dev->ifindex = new_ifindex;
if (new_name[0]) {
@@ -12219,7 +12226,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
- netdev_lock_ops(dev);
+ netdev_lock(dev);
+ dev->moving_ns = false;
+ if (!netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
--
2.49.0
next prev parent reply other threads:[~2025-04-08 20:00 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-08 19:59 [PATCH net-next v2 0/8] net: depend on instance lock for queue related netlink ops Jakub Kicinski
2025-04-08 19:59 ` Jakub Kicinski [this message]
2025-04-08 19:59 ` [PATCH net-next v2 2/8] net: designate XSK pool pointers in queues as "ops protected" Jakub Kicinski
2025-04-08 19:59 ` [PATCH net-next v2 3/8] netdev: add "ops compat locking" helpers Jakub Kicinski
2025-04-08 19:59 ` [PATCH net-next v2 4/8] netdev: don't hold rtnl_lock over nl queue info get when possible Jakub Kicinski
2025-04-08 19:59 ` [PATCH net-next v2 5/8] xdp: double protect netdev->xdp_flags with netdev->lock Jakub Kicinski
2025-04-08 22:15 ` Harshitha Ramamurthy
2025-04-09 18:40 ` Martin KaFai Lau
2025-04-08 19:59 ` [PATCH net-next v2 6/8] netdev: depend on netdev->lock for xdp features Jakub Kicinski
2025-04-10 17:10 ` Kuniyuki Iwashima
2025-04-11 2:10 ` Jakub Kicinski
2025-04-11 2:23 ` Jakub Kicinski
2025-04-11 17:41 ` Stanislav Fomichev
2025-04-11 19:19 ` Jakub Kicinski
2025-04-11 2:36 ` Kuniyuki Iwashima
2025-04-08 19:59 ` [PATCH net-next v2 7/8] docs: netdev: break down the instance locking info per ops struct Jakub Kicinski
2025-04-09 2:59 ` Joe Damato
2025-04-10 6:01 ` Jacob Keller
2025-04-10 16:08 ` Jakub Kicinski
2025-04-10 22:35 ` Keller, Jacob E
2025-04-10 23:39 ` Jakub Kicinski
2025-04-11 21:26 ` Jacob Keller
2025-04-08 19:59 ` [PATCH net-next v2 8/8] netdev: depend on netdev->lock for qstats in ops locked drivers Jakub Kicinski
2025-04-10 5:23 ` Jacob Keller
2025-04-10 23:46 ` Jakub Kicinski
2025-04-11 21:29 ` Jacob Keller
2025-04-10 0:40 ` [PATCH net-next v2 0/8] net: depend on instance lock for queue related netlink ops patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250408195956.412733-2-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=hramamurthy@google.com \
--cc=jdamato@fastly.com \
--cc=kuniyu@amazon.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.