Netdev List
 help / color / mirror / Atom feed
From: Kuniyuki Iwashima <kuniyu@google.com>
To: "David S . Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	 Jakub Kicinski <kuba@kernel.org>,
	Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Simon Horman <horms@kernel.org>,
	Kuniyuki Iwashima <kuniyu@google.com>,
	 Kuniyuki Iwashima <kuni1840@gmail.com>,
	netdev@vger.kernel.org
Subject: [PATCH v1 net-next 14/14] ipvlan: Support per-netns netdev unregistration.
Date: Wed,  1 Jul 2026 21:41:52 +0000	[thread overview]
Message-ID: <20260701214334.266991-15-kuniyu@google.com> (raw)
In-Reply-To: <20260701214334.266991-1-kuniyu@google.com>

When a lower device is unregistered, its upper ipvlan devices
must also be unregistered.  However, these upper devices may
reside in different netns than the lower device.

Let's use unregister_netdevice_queue_net() to support per-netns
device unregistration for ipvlan.

The new dying flag in struct ipvl_dev is used to avoid a race
that ipvlan_link_delete() is called while its lower device is
being removed in ipvlan_device_event().

If dying is true in ipvlan_link_delete(), the ipvlan device is
already destructed but not yet unregistered.  In this case,
unregistration will be done in __rtnl_net_unlock() of the
->dellink() caller.

Tested:

1. Create veth in ns1 and two ipvlan devices in ns2 and ns3.

  # ip netns add ns1
  # ip netns add ns2
  # ip netns add ns3
  # ip -n ns1 link add veth0 type veth peer veth1
  # ip -n ns2 link add ipvl2 link veth0 link-netns ns1 type ipvlan mode l2
  # ip -n ns3 link add ipvl3 link veth0 link-netns ns1 type ipvlan mode l2

2. Run bpftrace to check that veth is unregistered first but
   wait ipvlan to be unregistered

  # bpftrace -e '#include <linux/netdevice.h>
  kprobe:ipvlan_uninit,
  kprobe:veth_dellink,
  kprobe:free_netdev {
      $dev = (struct net_device *)arg0;
      printf("PID: %d | DEV: %s%s\n", pid, $dev->name, kstack());
  }'

3. Remove the lower veth0 in ns1.

  # ip -n ns1 link del veth0

We can see that veth0 is freed after unregistering ipvl2 and ipvl3
in per-netns work because ipvl_port holds refcount of veth0.

  PID: 2010 | DEV: veth0
          veth_dellink+5
          rtnl_dellink+1213
          rtnetlink_rcv_msg+1791
  ...
  PID: 440 | DEV: ipvl2
          ipvlan_uninit+5
          unregister_netdevice_many_notify+7129
          unregister_netdevice_many_net+1050
          rtnl_net_work_func+136
          process_scheduled_works+2538
  ...
  PID: 440 | DEV: ipvl2
          free_netdev+5
          netdev_run_todo+4798
          process_scheduled_works+2538
  ...
  PID: 440 | DEV: ipvl3
          ipvlan_uninit+5
          unregister_netdevice_many_notify+7129
          unregister_netdevice_many_net+1050
          rtnl_net_work_func+136
          process_scheduled_works+2538
  ...
  PID: 2010 | DEV: veth0
          free_netdev+5
          netdev_run_todo+4798
          rtnl_dellink+1507
          rtnetlink_rcv_msg+1791
  ...
  PID: 440 | DEV: ipvl3
          free_netdev+5
          netdev_run_todo+4798
          process_scheduled_works+2538
  ...

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
---
 drivers/net/ipvlan/ipvlan.h      |  4 +++-
 drivers/net/ipvlan/ipvlan_main.c | 25 ++++++++++++++++---------
 drivers/net/ipvlan/ipvtap.c      |  3 ++-
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index a0736f5c89f6..a83313244add 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -72,6 +72,7 @@ struct ipvl_dev {
 	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
 	netdev_features_t	sfeatures;
 	u32			msg_enable;
+	bool			dying;
 };
 
 struct ipvl_addr {
@@ -216,7 +217,8 @@ struct ipvtap_dev {
 	struct tap_dev	  tap;
 };
 
-void __ipvtap_dellink(struct net_device *dev, struct list_head *head);
+void __ipvtap_dellink(struct net *net, struct net_device *dev,
+		      struct list_head *head);
 #endif
 
 #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 41024fe27b78..7e2cf43ca78a 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -700,7 +700,8 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 }
 EXPORT_SYMBOL_GPL(ipvlan_link_new);
 
-static void __ipvlan_link_delete(struct net_device *dev, struct list_head *head)
+static void __ipvlan_link_delete(struct net *net, struct net_device *dev,
+				 struct list_head *head)
 {
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_addr *addr, *next;
@@ -715,7 +716,7 @@ static void __ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 
 	ida_free(&ipvlan->port->ida, dev->dev_id);
 	list_del_rcu(&ipvlan->pnode);
-	unregister_netdevice_queue(dev, head);
+	unregister_netdevice_queue_net(net, dev, head);
 	netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
 }
 
@@ -724,18 +725,20 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 
 	mutex_lock(&ipvlan->port->pnodes_lock);
-	__ipvlan_link_delete(dev, head);
+	if (!ipvlan->dying)
+		__ipvlan_link_delete(dev_net(dev), dev, head);
 	mutex_unlock(&ipvlan->port->pnodes_lock);
 }
 
 #if IS_ENABLED(CONFIG_IPVTAP)
-void __ipvtap_dellink(struct net_device *dev, struct list_head *head)
+void __ipvtap_dellink(struct net *net, struct net_device *dev,
+		      struct list_head *head)
 {
 	struct ipvtap_dev *vlantap = netdev_priv(dev);
 
 	netdev_rx_handler_unregister(dev);
 	tap_del_queues(&vlantap->tap);
-	__ipvlan_link_delete(dev, head);
+	__ipvlan_link_delete(net, dev, head);
 }
 EXPORT_SYMBOL_GPL(__ipvtap_dellink);
 #endif
@@ -832,22 +835,26 @@ static int ipvlan_device_event(struct notifier_block *unused,
 			ipvlan_migrate_l3s_hook(oldnet, newnet);
 		break;
 	}
-	case NETDEV_UNREGISTER:
+	case NETDEV_UNREGISTER: {
+		struct net *net = dev_net(dev);
+
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
 		list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) {
+			ipvlan->dying = true;
+
 #if IS_ENABLED(CONFIG_IPVTAP)
 			if (ipvlan->dev->rtnl_link_ops != &ipvlan_link_ops)
-				__ipvtap_dellink(ipvlan->dev, &lst_kill);
+				__ipvtap_dellink(net, ipvlan->dev, &lst_kill);
 			else
 #endif
-				__ipvlan_link_delete(ipvlan->dev, &lst_kill);
+				__ipvlan_link_delete(net, ipvlan->dev, &lst_kill);
 		}
 
 		unregister_netdevice_many(&lst_kill);
 		break;
-
+	}
 	case NETDEV_FEAT_CHANGE:
 		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
 			netif_inherit_tso_max(ipvlan->dev, dev);
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 17b0dd7cf73b..b790959c03f5 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -110,7 +110,8 @@ static void ipvtap_dellink(struct net_device *dev,
 	struct ipvl_port *port = vlantap->vlan.port;
 
 	mutex_lock(&port->pnodes_lock);
-	__ipvtap_dellink(dev, head);
+	if (!vlantap->vlan.dying)
+		__ipvtap_dellink(dev_net(dev), dev, head);
 	mutex_unlock(&port->pnodes_lock);
 }
 
-- 
2.55.0.rc0.799.gd6f94ed593-goog


  parent reply	other threads:[~2026-07-01 21:43 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-07-01 21:41 [PATCH v1 net-next 00/14] net: Support per-netns device unregistration Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 01/14] rtnetlink: Lock sock_net(skb->sk) in rtnl_newlink() Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 02/14] rtnetlink: Call unregister_netdevice_many() only once in rtnl_link_unregister() Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 03/14] rtnetlink: Add per-netns rtnl_work Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 04/14] net: Wrap default_device_exit_net() with __rtnl_net_lock() Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 05/14] net: Hold __rtnl_net_lock() in netdev_wait_allrefs_any() Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 06/14] net: Add per-netns netdev unregistration infra Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 07/14] net: Call unregister_netdevice_many() per netns Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 08/14] veth: Support per-netns device unregistration Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 09/14] bareudp: Protect bareudp_list with mutex Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 10/14] bareudp: Support per-netns netdev unregistration Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 11/14] ipvlan: Convert ipvl_port.count to refcount_t Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 12/14] ipvlan: Synchronise ipvlan_init() and ipvlan_uninit() for the same lower dev Kuniyuki Iwashima
2026-07-01 21:41 ` [PATCH v1 net-next 13/14] ipvlan: Protect ipvl_port.ipvlans with mutex Kuniyuki Iwashima
2026-07-01 21:41 ` Kuniyuki Iwashima [this message]
2026-07-02  7:45 ` [syzbot ci] Re: net: Support per-netns device unregistration syzbot ci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260701214334.266991-15-kuniyu@google.com \
    --to=kuniyu@google.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=kuni1840@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox