From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
andrew+netdev@lunn.ch, horms@kernel.org, sdf@fomichev.me,
hramamurthy@google.com, kuniyu@amazon.com, jdamato@fastly.com
Subject: [PATCH net-next 1/8] net: avoid potential race between netdev_get_by_index_lock() and netns switch
Date: Mon, 7 Apr 2025 12:01:10 -0700 [thread overview]
Message-ID: <20250407190117.16528-2-kuba@kernel.org> (raw)
In-Reply-To: <20250407190117.16528-1-kuba@kernel.org>
netdev_get_by_index_lock() performs following steps:
rcu_lock();
dev = lookup(netns, ifindex);
dev_get(dev);
rcu_unlock();
[... lock & validate the dev ...]
return dev
Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.
We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.
AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
include/linux/netdevice.h | 6 +++++-
net/core/dev.h | 2 +-
net/core/dev.c | 25 ++++++++++++++++++-------
3 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..8e9be80bc167 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
* @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
+ * protected by @lock
*
* @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {
bool dismantle;
+ /** @moving_ns: device is changing netns, protected by @lock */
+ bool moving_ns;
+
enum {
RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
* @net_shaper_hierarchy, @reg_state, @threaded
*
* Double protects:
- * @up
+ * @up, @moving_ns, @nd_net
*
* Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues
diff --git a/net/core/dev.h b/net/core/dev.h
index 7ee203395d8e..3cc2d8787c83 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -29,7 +29,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
-struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0608605cfc24..7060c3171cd8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (!dev)
return NULL;
@@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
* This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away.
*/
-struct net_device *__netdev_put_lock(struct net_device *dev)
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{
netdev_lock(dev);
- if (dev->reg_state > NETREG_REGISTERED) {
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
netdev_unlock(dev);
dev_put(dev);
return NULL;
@@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
if (!dev)
return NULL;
- return __netdev_put_lock(dev);
+ return __netdev_put_lock(dev, net);
}
struct net_device *
@@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (dev)
return dev;
@@ -12154,7 +12155,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
netif_close(dev);
/* And unlink it from device chain */
unlist_netdevice(dev);
- netdev_unlock_ops(dev);
+
+ if (!netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ dev->moving_ns = true;
+ netdev_unlock(dev);
synchronize_net();
@@ -12190,7 +12195,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
move_netdevice_notifiers_dev_net(dev, net);
/* Actually switch the network namespace */
+ netdev_lock(dev);
dev_net_set(dev, net);
+ netdev_unlock(dev);
dev->ifindex = new_ifindex;
if (new_name[0]) {
@@ -12216,7 +12223,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
- netdev_lock_ops(dev);
+ netdev_lock(dev);
+ dev->moving_ns = false;
+ if (!netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
--
2.49.0
next prev parent reply other threads:[~2025-04-07 19:01 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-07 19:01 [PATCH net-next 0/8] net: depend on instance lock for queue related netlink ops Jakub Kicinski
2025-04-07 19:01 ` Jakub Kicinski [this message]
2025-04-08 2:40 ` [PATCH net-next 1/8] net: avoid potential race between netdev_get_by_index_lock() and netns switch Joe Damato
2025-04-07 19:01 ` [PATCH net-next 2/8] net: designate XSK pool pointers in queues as "ops protected" Jakub Kicinski
2025-04-08 2:17 ` Joe Damato
2025-04-08 14:59 ` Jakub Kicinski
2025-04-07 19:01 ` [PATCH net-next 3/8] netdev: add "ops compat locking" helpers Jakub Kicinski
2025-04-08 2:41 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 4/8] netdev: don't hold rtnl_lock over nl queue info get when possible Jakub Kicinski
2025-04-08 2:28 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 5/8] xdp: double protect netdev->xdp_flags with netdev->lock Jakub Kicinski
2025-04-08 2:30 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 6/8] netdev: depend on netdev->lock for xdp features Jakub Kicinski
2025-04-08 2:31 ` Joe Damato
2025-04-07 19:01 ` [PATCH net-next 7/8] docs: netdev: break down the instance locking info per ops struct Jakub Kicinski
2025-04-08 2:34 ` Joe Damato
2025-04-08 15:08 ` Jakub Kicinski
2025-04-07 19:01 ` [PATCH net-next 8/8] netdev: depend on netdev->lock for qstats in ops locked drivers Jakub Kicinski
2025-04-08 2:37 ` Joe Damato
2025-04-08 0:28 ` [PATCH net-next 0/8] net: depend on instance lock for queue related netlink ops Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250407190117.16528-2-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=hramamurthy@google.com \
--cc=jdamato@fastly.com \
--cc=kuniyu@amazon.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).