* [PATCH v2 net-next 01/16] net: add exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 02/16] nexthop: convert nexthop_net_exit_batch to exit_batch_rtnl method Eric Dumazet
` (14 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
Many (struct pernet_operations)->exit_batch() methods have
to acquire rtnl.
In presence of rtnl mutex pressure, this makes cleanup_net()
very slow.
This patch adds a new exit_batch_rtnl() method to reduce
number of rtnl acquisitions from cleanup_net().
exit_batch_rtnl() handlers are called while rtnl is locked,
and devices to be killed can be queued in a list provided
as their second argument.
A single unregister_netdevice_many() is called right
before rtnl is released.
exit_batch_rtnl() handlers are called before ->exit() and
->exit_batch() handlers.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/net_namespace.h | 3 +++
net/core/net_namespace.c | 31 ++++++++++++++++++++++++++++++-
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 13b3a4e29fdb3b1f37649072ea71181ec1bad256..5e5b522eca88e9e19345792bd5137eb8cf374265 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -450,6 +450,9 @@ struct pernet_operations {
void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
+ /* Following method is called with RTNL held. */
+ void (*exit_batch_rtnl)(struct list_head *net_exit_list,
+ struct list_head *dev_kill_list);
unsigned int *id;
size_t size;
};
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72799533426b6162256d7c4eef355af96c66e844..233ec0cdd0111d5ca21c6f8a66f4c1f3fbc4657b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
- int error = 0;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
+ int error = 0;
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
@@ -357,6 +358,15 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
synchronize_rcu();
+ ops = saved_ops;
+ rtnl_lock();
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+
ops = saved_ops;
list_for_each_entry_continue_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
@@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
/* Atomically snapshot the list of namespaces to cleanup */
net_kill_list = llist_del_all(&cleanup_list);
@@ -613,6 +624,14 @@ static void cleanup_net(struct work_struct *work)
*/
synchronize_rcu();
+ rtnl_lock();
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+
/* Run all of the network namespace exit methods */
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
@@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_
{
ops_pre_exit_list(ops, net_exit_list);
synchronize_rcu();
+
+ if (ops->exit_batch_rtnl) {
+ LIST_HEAD(dev_kill_list);
+
+ rtnl_lock();
+ ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+ }
ops_exit_list(ops, net_exit_list);
+
ops_free_list(ops, net_exit_list);
}
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 02/16] nexthop: convert nexthop_net_exit_batch to exit_batch_rtnl method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 01/16] net: add exit_batch_rtnl() method Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 03/16] net: convert default_device_exit_batch() " Eric Dumazet
` (13 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held.
This saves one rtnl_lock()/rtnl_unlock() pair.
We also need to create nexthop_net_exit()
to make sure net->nexthop.devhash is not freed too soon,
otherwise we will not be able to unregister netdev
from exit_batch_rtnl() methods.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/nexthop.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index bbff68b5b5d4a1d835c9785fbe84f4cab32a1db0..7270a8631406c508eebf85c42eb29a5268d7d7cf 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3737,16 +3737,20 @@ void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
+static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
+ ASSERT_RTNL();
+ list_for_each_entry(net, net_list, exit_list)
flush_all_nexthops(net);
- kfree(net->nexthop.devhash);
- }
- rtnl_unlock();
+}
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+ kfree(net->nexthop.devhash);
+ net->nexthop.devhash = NULL;
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3764,7 +3768,8 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit_batch = nexthop_net_exit_batch,
+ .exit = nexthop_net_exit,
+ .exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};
static int __init nexthop_init(void)
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 03/16] net: convert default_device_exit_batch() to exit_batch_rtnl method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 01/16] net: add exit_batch_rtnl() method Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 02/16] nexthop: convert nexthop_net_exit_batch to exit_batch_rtnl method Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 04/16] bareudp: use exit_batch_rtnl() method Eric Dumazet
` (12 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair,
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/core/dev.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index b53b9c94de4008aa7e808d58618675425aff0f4c..86107a9c9dd09d5590578923018be56065fbd58c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11596,7 +11596,8 @@ static void __net_exit default_device_exit_net(struct net *net)
}
}
-static void __net_exit default_device_exit_batch(struct list_head *net_list)
+static void __net_exit default_device_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_kill_list)
{
/* At exit all network devices most be removed from a network
* namespace. Do this in the reverse order of registration.
@@ -11605,9 +11606,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
*/
struct net_device *dev;
struct net *net;
- LIST_HEAD(dev_kill_list);
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
default_device_exit_net(net);
cond_resched();
@@ -11616,17 +11615,15 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
- dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
+ dev->rtnl_link_ops->dellink(dev, dev_kill_list);
else
- unregister_netdevice_queue(dev, &dev_kill_list);
+ unregister_netdevice_queue(dev, dev_kill_list);
}
}
- unregister_netdevice_many(&dev_kill_list);
- rtnl_unlock();
}
static struct pernet_operations __net_initdata default_device_ops = {
- .exit_batch = default_device_exit_batch,
+ .exit_batch_rtnl = default_device_exit_batch_rtnl,
};
static void __init net_dev_struct_check(void)
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 04/16] bareudp: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (2 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 03/16] net: convert default_device_exit_batch() " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 05/16] bonding: " Eric Dumazet
` (11 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair,
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/bareudp.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 31377bb1cc97cba08e02dc7d48761068627af3fb..4db6122c9b43032a36b98916bb4390e3d6f08f68 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -760,23 +760,18 @@ static void bareudp_destroy_tunnels(struct net *net, struct list_head *head)
unregister_netdevice_queue(bareudp->dev, head);
}
-static void __net_exit bareudp_exit_batch_net(struct list_head *net_list)
+static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_kill_list)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
- bareudp_destroy_tunnels(net, &list);
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ bareudp_destroy_tunnels(net, dev_kill_list);
}
static struct pernet_operations bareudp_net_ops = {
.init = bareudp_init_net,
- .exit_batch = bareudp_exit_batch_net,
+ .exit_batch_rtnl = bareudp_exit_batch_rtnl,
.id = &bareudp_net_id,
.size = sizeof(struct bareudp_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 05/16] bonding: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (3 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 04/16] bareudp: use exit_batch_rtnl() method Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 19:34 ` Jay Vosburgh
2024-02-02 17:39 ` [PATCH v2 net-next 06/16] geneve: " Eric Dumazet
` (10 subsequent siblings)
15 siblings, 1 reply; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet, Jay Vosburgh,
Andy Gospodarek
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair,
and one unregister_netdevice_many() call.
v2: Added bond_net_pre_exit() method to make sure bond_destroy_sysfs()
is called before we unregister the devices in bond_net_exit_batch_rtnl
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/netdev/170688415193.5216.10499830272732622816@kwain/
Cc: Antoine Tenart <atenart@kernel.org>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
---
drivers/net/bonding/bond_main.c | 37 +++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4e0600c7b050f21c82a8862e224bb055e95d5039..a5e3d000ebd85c09beba379a2e6a7f69a0fd4c88 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -6415,28 +6415,41 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}
-static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
+ * race condition in bond unloading") we need to remove sysfs files
+ * before we remove our devices (done later in bond_net_exit_batch_rtnl())
+ */
+static void __net_exit bond_net_pre_exit(struct net *net)
+{
+ struct bond_net *bn = net_generic(net, bond_net_id);
+
+ bond_destroy_sysfs(bn);
+}
+
+static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_kill_list)
{
struct bond_net *bn;
struct net *net;
- LIST_HEAD(list);
-
- list_for_each_entry(net, net_list, exit_list) {
- bn = net_generic(net, bond_net_id);
- bond_destroy_sysfs(bn);
- }
/* Kill off any bonds created after unregistering bond rtnl ops */
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
struct bonding *bond, *tmp_bond;
bn = net_generic(net, bond_net_id);
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- unregister_netdevice_queue(bond->dev, &list);
+ unregister_netdevice_queue(bond->dev, dev_kill_list);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
+}
+
+/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
+ * only after all bonds are gone") bond_destroy_proc_dir() is called
+ * after bond_net_exit_batch_rtnl() has completed.
+ */
+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+{
+ struct bond_net *bn;
+ struct net *net;
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
@@ -6446,6 +6459,8 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
+ .pre_exit = bond_net_pre_exit,
+ .exit_batch_rtnl = bond_net_exit_batch_rtnl,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 net-next 05/16] bonding: use exit_batch_rtnl() method
2024-02-02 17:39 ` [PATCH v2 net-next 05/16] bonding: " Eric Dumazet
@ 2024-02-02 19:34 ` Jay Vosburgh
0 siblings, 0 replies; 20+ messages in thread
From: Jay Vosburgh @ 2024-02-02 19:34 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Antoine Tenart,
netdev, eric.dumazet, Andy Gospodarek
Eric Dumazet <edumazet@google.com> wrote:
>exit_batch_rtnl() is called while RTNL is held,
>and devices to be unregistered can be queued in the dev_kill_list.
>
>This saves one rtnl_lock()/rtnl_unlock() pair,
>and one unregister_netdevice_many() call.
>
>v2: Added bond_net_pre_exit() method to make sure bond_destroy_sysfs()
> is called before we unregister the devices in bond_net_exit_batch_rtnl
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>
>Link: https://lore.kernel.org/netdev/170688415193.5216.10499830272732622816@kwain/
>Cc: Antoine Tenart <atenart@kernel.org>
>Cc: Jay Vosburgh <j.vosburgh@gmail.com>
>Cc: Andy Gospodarek <andy@greyhouse.net>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
>---
> drivers/net/bonding/bond_main.c | 37 +++++++++++++++++++++++----------
> 1 file changed, 26 insertions(+), 11 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 4e0600c7b050f21c82a8862e224bb055e95d5039..a5e3d000ebd85c09beba379a2e6a7f69a0fd4c88 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -6415,28 +6415,41 @@ static int __net_init bond_net_init(struct net *net)
> return 0;
> }
>
>-static void __net_exit bond_net_exit_batch(struct list_head *net_list)
>+/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
>+ * race condition in bond unloading") we need to remove sysfs files
>+ * before we remove our devices (done later in bond_net_exit_batch_rtnl())
>+ */
>+static void __net_exit bond_net_pre_exit(struct net *net)
>+{
>+ struct bond_net *bn = net_generic(net, bond_net_id);
>+
>+ bond_destroy_sysfs(bn);
>+}
>+
>+static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
>+ struct list_head *dev_kill_list)
> {
> struct bond_net *bn;
> struct net *net;
>- LIST_HEAD(list);
>-
>- list_for_each_entry(net, net_list, exit_list) {
>- bn = net_generic(net, bond_net_id);
>- bond_destroy_sysfs(bn);
>- }
>
> /* Kill off any bonds created after unregistering bond rtnl ops */
>- rtnl_lock();
> list_for_each_entry(net, net_list, exit_list) {
> struct bonding *bond, *tmp_bond;
>
> bn = net_generic(net, bond_net_id);
> list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
>- unregister_netdevice_queue(bond->dev, &list);
>+ unregister_netdevice_queue(bond->dev, dev_kill_list);
> }
>- unregister_netdevice_many(&list);
>- rtnl_unlock();
>+}
>+
>+/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
>+ * only after all bonds are gone") bond_destroy_proc_dir() is called
>+ * after bond_net_exit_batch_rtnl() has completed.
>+ */
>+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
>+{
>+ struct bond_net *bn;
>+ struct net *net;
>
> list_for_each_entry(net, net_list, exit_list) {
> bn = net_generic(net, bond_net_id);
>@@ -6446,6 +6459,8 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)
>
> static struct pernet_operations bond_net_ops = {
> .init = bond_net_init,
>+ .pre_exit = bond_net_pre_exit,
>+ .exit_batch_rtnl = bond_net_exit_batch_rtnl,
> .exit_batch = bond_net_exit_batch,
> .id = &bond_net_id,
> .size = sizeof(struct bond_net),
>--
>2.43.0.594.gd9cf4e227d-goog
>
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 net-next 06/16] geneve: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (4 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 05/16] bonding: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 07/16] gtp: " Eric Dumazet
` (9 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair,
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/geneve.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 32c51c244153bd760b9f58001906c04c8b0f37ff..f31fc52ef397dfe0eba854385f783fbcad7e870f 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1900,18 +1900,13 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
}
}
-static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
+static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
- geneve_destroy_tunnels(net, &list);
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ geneve_destroy_tunnels(net, dev_to_kill);
list_for_each_entry(net, net_list, exit_list) {
const struct geneve_net *gn = net_generic(net, geneve_net_id);
@@ -1922,7 +1917,7 @@ static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
- .exit_batch = geneve_exit_batch_net,
+ .exit_batch_rtnl = geneve_exit_batch_rtnl,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 07/16] gtp: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (5 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 06/16] geneve: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 08/16] ipv4: add __unregister_nexthop_notifier() Eric Dumazet
` (8 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair per netns
and one unregister_netdevice_many() call per netns.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/gtp.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index b1919278e931f4e9fb6b2d2ec2feb2193b2cda61..62c601d9f7528d456dc6695814bf01a4d756d2da 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1876,23 +1876,23 @@ static int __net_init gtp_net_init(struct net *net)
return 0;
}
-static void __net_exit gtp_net_exit(struct net *net)
+static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- struct gtp_net *gn = net_generic(net, gtp_net_id);
- struct gtp_dev *gtp;
- LIST_HEAD(list);
+ struct net *net;
- rtnl_lock();
- list_for_each_entry(gtp, &gn->gtp_dev_list, list)
- gtp_dellink(gtp->dev, &list);
+ list_for_each_entry(net, net_list, exit_list) {
+ struct gtp_net *gn = net_generic(net, gtp_net_id);
+ struct gtp_dev *gtp;
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ list_for_each_entry(gtp, &gn->gtp_dev_list, list)
+ gtp_dellink(gtp->dev, dev_to_kill);
+ }
}
static struct pernet_operations gtp_net_ops = {
.init = gtp_net_init,
- .exit = gtp_net_exit,
+ .exit_batch_rtnl = gtp_net_exit_batch_rtnl,
.id = >p_net_id,
.size = sizeof(struct gtp_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 08/16] ipv4: add __unregister_nexthop_notifier()
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (6 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 07/16] gtp: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 09/16] vxlan: use exit_batch_rtnl() method Eric Dumazet
` (7 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
unregister_nexthop_notifier() assumes the caller does not hold rtnl.
We need in the following patch to use it from a context
already holding rtnl.
Add __unregister_nexthop_notifier().
unregister_nexthop_notifier() becomes a wrapper.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/nexthop.h | 1 +
net/ipv4/nexthop.c | 19 +++++++++++++------
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index d92046a4a078250eec528f3cb2c3ab557decad03..6647ad509faa02a9a13d58f3405c4a540abc5077 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -218,6 +218,7 @@ struct nh_notifier_info {
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 7270a8631406c508eebf85c42eb29a5268d7d7cf..70509da4f0806d25b3707835c08888d5e57b782e 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3631,17 +3631,24 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
}
EXPORT_SYMBOL(register_nexthop_notifier);
-int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
int err;
- rtnl_lock();
err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
nb);
- if (err)
- goto unlock;
- nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
-unlock:
+ if (!err)
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(__unregister_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = __unregister_nexthop_notifier(net, nb);
rtnl_unlock();
return err;
}
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 09/16] vxlan: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (7 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 08/16] ipv4: add __unregister_nexthop_notifier() Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 10/16] ip6_gre: " Eric Dumazet
` (6 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair per netns
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/vxlan/vxlan_core.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 16106e088c6301d3aaa47dd73985107945735b6e..df664de4b2b6cc361363b804e7ad531d59e2cdfa 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -4846,23 +4846,25 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
}
-static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
+static void __net_exit vxlan_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- unsigned int h;
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
+ __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
+
+ vxlan_destroy_tunnels(net, dev_to_kill);
}
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
- vxlan_destroy_tunnels(net, &list);
+}
- unregister_netdevice_many(&list);
- rtnl_unlock();
+static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
+{
+ struct net *net;
+ unsigned int h;
list_for_each_entry(net, net_list, exit_list) {
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -4875,6 +4877,7 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net,
.exit_batch = vxlan_exit_batch_net,
+ .exit_batch_rtnl = vxlan_exit_batch_rtnl,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 10/16] ip6_gre: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (8 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 09/16] vxlan: use exit_batch_rtnl() method Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 11/16] ip6_tunnel: " Eric Dumazet
` (5 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/ip6_gre.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 070d87abf7c0284aa23043391aab080534e144a7..428f03e9da45ac323aa357b5a9d299fb7f3d3a5b 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1632,21 +1632,19 @@ static int __net_init ip6gre_init_net(struct net *net)
return err;
}
-static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6gre_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch = ip6gre_exit_batch_net,
+ .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 11/16] ip6_tunnel: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (9 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 10/16] ip6_gre: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 12/16] ip6_vti: " Eric Dumazet
` (4 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/ip6_tunnel.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9bbabf750a21e251d4e8f9e3059c707505f5ce32..bfb0a6c601c119cc38901998c47d0c98be047d90 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2282,21 +2282,19 @@ static int __net_init ip6_tnl_init_net(struct net *net)
return err;
}
-static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6_tnl_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch = ip6_tnl_exit_batch_net,
+ .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 12/16] ip6_vti: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (10 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 11/16] ip6_tunnel: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 13/16] sit: " Eric Dumazet
` (3 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/ip6_vti.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index e550240c85e1c9f2fe2b835e903de28e1f08b3bc..cfe1b1ad4d85d303597784d5eeb3077383978d95 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1174,24 +1174,22 @@ static int __net_init vti6_init_net(struct net *net)
return err;
}
-static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
+static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct vti6_net *ip6n;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, &list);
+ vti6_destroy_tunnels(ip6n, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch = vti6_exit_batch_net,
+ .exit_batch_rtnl = vti6_exit_batch_rtnl,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 13/16] sit: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (11 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 12/16] ip6_vti: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:39 ` [PATCH v2 net-next 14/16] ip_tunnel: " Eric Dumazet
` (2 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/sit.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cc24cefdb85c0944c03c019b1c4214302d18e2c8..61b2b71fa8bedea6d185348ff781356652434b33 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1875,22 +1875,19 @@ static int __net_init sit_init_net(struct net *net)
return err;
}
-static void __net_exit sit_exit_batch_net(struct list_head *net_list)
+static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- LIST_HEAD(list);
struct net *net;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, &list);
-
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ sit_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch = sit_exit_batch_net,
+ .exit_batch_rtnl = sit_exit_batch_rtnl,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 14/16] ip_tunnel: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (12 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 13/16] sit: " Eric Dumazet
@ 2024-02-02 17:39 ` Eric Dumazet
2024-02-02 17:40 ` [PATCH v2 net-next 15/16] bridge: " Eric Dumazet
2024-02-02 17:40 ` [PATCH v2 net-next 16/16] xfrm: interface: " Eric Dumazet
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:39 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair
and one unregister_netdevice_many() call.
This patch takes care of ipip, ip_vti, and ip_gre tunnels.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip_tunnels.h | 3 ++-
net/ipv4/ip_gre.c | 24 +++++++++++++++---------
net/ipv4/ip_tunnel.c | 10 ++++------
net/ipv4/ip_vti.c | 8 +++++---
net/ipv4/ipip.c | 8 +++++---
5 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 2d746f4c9a0a4792bc16971c107d598190897433..5cd64bb2104df389250fb3c518ba00a3826c53f7 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
- struct rtnl_link_ops *ops);
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5169c3c72cffe49cef613e69889d139db867ff74..aad5125b7a65ecc770f1b962ac5b417bd931e3ba 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1025,14 +1025,16 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit_batch = ipgre_exit_batch_net,
+ .exit_batch_rtnl = ipgre_exit_batch_rtnl,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1697,14 +1699,16 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit_batch = ipgre_tap_exit_batch_net,
+ .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1715,14 +1719,16 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
+ dev_to_kill);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit_batch = erspan_exit_batch_net,
+ .exit_batch_rtnl = erspan_exit_batch_rtnl,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index beeae624c412d752bd5ee5d459a88f57640445e9..00da0b80320fb514bca58de7cd13894ab49a2ca6 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1130,19 +1130,17 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
}
void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- struct rtnl_link_ops *ops)
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill)
{
struct ip_tunnel_net *itn;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(net, itn, &list, ops);
+ ip_tunnel_destroy(net, itn, dev_to_kill, ops);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 9ab9b3ebe0cd1a9e95f489d98c5a3d89c7c0edf6..fb1f52d2131128a39ab5bf0482359b7b75989fb6 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -510,14 +510,16 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_batch_net(struct list_head *list_net)
+static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops,
+ dev_to_kill);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit_batch = vti_exit_batch_net,
+ .exit_batch_rtnl = vti_exit_batch_rtnl,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 27b8f83c6ea200314f41a29ecfea494b9ddef2ca..0151eea06cc50bec4ae64f08ca6a7161e3cbf9ae 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -592,14 +592,16 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit_batch = ipip_exit_batch_net,
+ .exit_batch_rtnl = ipip_exit_batch_rtnl,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 net-next 15/16] bridge: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (13 preceding siblings ...)
2024-02-02 17:39 ` [PATCH v2 net-next 14/16] ip_tunnel: " Eric Dumazet
@ 2024-02-02 17:40 ` Eric Dumazet
2024-02-04 5:10 ` Jakub Kicinski
2024-02-02 17:40 ` [PATCH v2 net-next 16/16] xfrm: interface: " Eric Dumazet
15 siblings, 1 reply; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:40 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair per netns
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/bridge/br.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index ac19b797dbece972f236211b9b286c298315df25..2cab878e0a39c99c10952be7d5c732a40c754655 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit_batch(struct list_head *net_list)
+static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net_device *dev;
struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
for_each_netdev(net, dev)
if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
-
- unregister_netdevice_many(&list);
-
- rtnl_unlock();
+ br_dev_delete(dev, dev_to_kill);
}
static struct pernet_operations br_net_ops = {
- .exit_batch = br_net_exit_batch,
+ .exit_batch_rtnl = br_net_exit_batch_rtnl,
};
static const struct stp_proto br_stp_proto = {
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 net-next 15/16] bridge: use exit_batch_rtnl() method
2024-02-02 17:40 ` [PATCH v2 net-next 15/16] bridge: " Eric Dumazet
@ 2024-02-04 5:10 ` Jakub Kicinski
2024-02-04 10:15 ` Eric Dumazet
0 siblings, 1 reply; 20+ messages in thread
From: Jakub Kicinski @ 2024-02-04 5:10 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Paolo Abeni, Antoine Tenart, netdev,
eric.dumazet
On Fri, 2 Feb 2024 17:40:00 +0000 Eric Dumazet wrote:
> exit_batch_rtnl() is called while RTNL is held,
> and devices to be unregistered can be queued in the dev_kill_list.
>
> This saves one rtnl_lock()/rtnl_unlock() pair per netns
> and one unregister_netdevice_many() call.
This one appears to cause a lot of crashes in the selftests:
https://netdev.bots.linux.dev/contest.html?branch=net-next-2024-02-03--21-00&pw-n=0&pass=0
Example crash:
https://netdev-2.bots.linux.dev/vmksft-bonding/results/449900/vm-crash-thr0-2
--
pw-bot: cr
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v2 net-next 15/16] bridge: use exit_batch_rtnl() method
2024-02-04 5:10 ` Jakub Kicinski
@ 2024-02-04 10:15 ` Eric Dumazet
0 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-04 10:15 UTC (permalink / raw)
To: Jakub Kicinski
Cc: David S . Miller, Paolo Abeni, Antoine Tenart, netdev,
eric.dumazet
On Sun, Feb 4, 2024 at 6:10 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri, 2 Feb 2024 17:40:00 +0000 Eric Dumazet wrote:
> > exit_batch_rtnl() is called while RTNL is held,
> > and devices to be unregistered can be queued in the dev_kill_list.
> >
> > This saves one rtnl_lock()/rtnl_unlock() pair per netns
> > and one unregister_netdevice_many() call.
>
> This one appears to cause a lot of crashes in the selftests:
>
> https://netdev.bots.linux.dev/contest.html?branch=net-next-2024-02-03--21-00&pw-n=0&pass=0
>
> Example crash:
>
> https://netdev-2.bots.linux.dev/vmksft-bonding/results/449900/vm-crash-thr0-2
> --
> pw-bot: cr
Hi Jakub, thanks for letting me know.
It seems default_device_exit_batch_rtnl() is called before
br_net_exit_batch_rtnl().
We call the br_dev_delete() function twice.
unregister_netdevice_queue() is called twice.
So the real issue is with patch "net: convert
default_device_exit_batch() to exit_batch_rtnl method".
We depended on the fact that the rtnl_lock()/rtnl_unlock() pairs were
committing small batches
of device removals.
I will rework this patch and move it to the last patch in the series.
(use list_empty(&dev->unreg_list) to detect a device is already queued
for removal)
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 net-next 16/16] xfrm: interface: use exit_batch_rtnl() method
2024-02-02 17:39 [PATCH v2 net-next 00/16] net: more factorization in cleanup_net() paths Eric Dumazet
` (14 preceding siblings ...)
2024-02-02 17:40 ` [PATCH v2 net-next 15/16] bridge: " Eric Dumazet
@ 2024-02-02 17:40 ` Eric Dumazet
15 siblings, 0 replies; 20+ messages in thread
From: Eric Dumazet @ 2024-02-02 17:40 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Antoine Tenart, netdev, eric.dumazet, Eric Dumazet
exit_batch_rtnl() is called while RTNL is held,
and devices to be unregistered can be queued in the dev_kill_list.
This saves one rtnl_lock()/rtnl_unlock() pair per netns
and one unregister_netdevice_many() call.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/xfrm/xfrm_interface_core.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 21d50d75c26088063538d9b9da5cba93db181a1f..dafefef3cf51a79fd6701a8b78c3f8fcfd10615d 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -957,12 +957,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
-static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_exit_list, exit_list) {
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if __rcu **xip;
@@ -973,18 +973,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
for (xip = &xfrmn->xfrmi[i];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
if (xi)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations xfrmi_net_ops = {
- .exit_batch = xfrmi_exit_batch_net,
+ .exit_batch_rtnl = xfrmi_exit_batch_rtnl,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
--
2.43.0.594.gd9cf4e227d-goog
^ permalink raw reply related [flat|nested] 20+ messages in thread