From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
andrew+netdev@lunn.ch, horms@kernel.org, sdf@fomichev.me,
hramamurthy@google.com, kuniyu@amazon.com, jdamato@fastly.com
Subject: [PATCH net-next 1/8] net: avoid potential race between netdev_get_by_index_lock() and netns switch
Date: Mon, 7 Apr 2025 12:01:10 -0700 [thread overview]
Message-ID: <20250407190117.16528-2-kuba@kernel.org> (raw)
In-Reply-To: <20250407190117.16528-1-kuba@kernel.org>
netdev_get_by_index_lock() performs following steps:
rcu_lock();
dev = lookup(netns, ifindex);
dev_get(dev);
rcu_unlock();
[... lock & validate the dev ...]
return dev
Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.
We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.
AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
include/linux/netdevice.h | 6 +++++-
net/core/dev.h | 2 +-
net/core/dev.c | 25 ++++++++++++++++++-------
3 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..8e9be80bc167 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
* @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
+ * protected by @lock
*
* @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {
bool dismantle;
+ /** @moving_ns: device is changing netns, protected by @lock */
+ bool moving_ns;
+
enum {
RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
* @net_shaper_hierarchy, @reg_state, @threaded
*
* Double protects:
- * @up
+ * @up, @moving_ns, @nd_net
*
* Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues
diff --git a/net/core/dev.h b/net/core/dev.h
index 7ee203395d8e..3cc2d8787c83 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -29,7 +29,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
-struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0608605cfc24..7060c3171cd8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (!dev)
return NULL;
@@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
* This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away.
*/
-struct net_device *__netdev_put_lock(struct net_device *dev)
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{
netdev_lock(dev);
- if (dev->reg_state > NETREG_REGISTERED) {
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
netdev_unlock(dev);
dev_put(dev);
return NULL;
@@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
if (!dev)
return NULL;
- return __netdev_put_lock(dev);
+ return __netdev_put_lock(dev, net);
}
struct net_device *
@@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (dev)
return dev;
@@ -12154,7 +12155,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
netif_close(dev);
/* And unlink it from device chain */
unlist_netdevice(dev);
- netdev_unlock_ops(dev);
+
+ if (!netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ dev->moving_ns = true;
+ netdev_unlock(dev);
synchronize_net();
@@ -12190,7 +12195,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
move_netdevice_notifiers_dev_net(dev, net);
/* Actually switch the network namespace */
+ netdev_lock(dev);
dev_net_set(dev, net);
+ netdev_unlock(dev);
dev->ifindex = new_ifindex;
if (new_name[0]) {
@@ -12216,7 +12223,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
- netdev_lock_ops(dev);
+ netdev_lock(dev);
+ dev->moving_ns = false;
+ if (!netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
--
2.49.0
next prev parent reply other threads:[~2025-04-07 19:01 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-07 19:01 [PATCH net-next 0/8] net: depend on instance lock for queue related netlink ops Jakub Kicinski
2025-04-07 19:01 ` Jakub Kicinski [this message]
2025-04-08 2:40 ` [PATCH net-next 1/8] net: avoid potential race between netdev_get_by_index_lock() and netns switch Joe Damato
2025-04-07 19:01 ` [PATCH net-next 2/8] net: designate XSK pool pointers in queues as "ops protected" Jakub Kicinski
2025-04-08 2:17 ` Joe Damato
2025-04-08 14:59 ` Jakub Kicinski
2025-04-07 19:01 ` [PATCH net-next 3/8] netdev: add "ops compat locking" helpers Jakub Kicinski
2025-04-08 2:41 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 4/8] netdev: don't hold rtnl_lock over nl queue info get when possible Jakub Kicinski
2025-04-08 2:28 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 5/8] xdp: double protect netdev->xdp_flags with netdev->lock Jakub Kicinski
2025-04-08 2:30 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 6/8] netdev: depend on netdev->lock for xdp features Jakub Kicinski
2025-04-08 2:31 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 7/8] docs: netdev: break down the instance locking info per ops struct Jakub Kicinski
2025-04-08 2:34 ` Joe Damato
2025-04-08 15:08 ` Jakub Kicinski
2025-04-07 19:01 ` [PATCH net-next 8/8] netdev: depend on netdev->lock for qstats in ops locked drivers Jakub Kicinski
2025-04-08 2:37 ` Joe Damato
2025-04-08 0:28 ` [PATCH net-next 0/8] net: depend on instance lock for queue related netlink ops Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250407190117.16528-2-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=hramamurthy@google.com \
--cc=jdamato@fastly.com \
--cc=kuniyu@amazon.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.