* [PATCH] net: factorize rt_do_flush for batch device unregistering
@ 2009-11-16 21:08 Octavian Purdila
2009-11-16 21:32 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Octavian Purdila @ 2009-11-16 21:08 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet
Tests performed with per device sysctl/sysfs entries disabled:
$ insmod /lib/modules/dummy.ko numdummies=8000
$ time rmmod dummy
Without the patch: With the patch:
real 0m 3.65s real 0m 0.27s
user 0m 0.00s user 0m 0.00s
sys 0m 3.42s sys 0m 0.24s
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
net/core/dev.c | 28 ++++++++++++++++++++++++++--
net/ipv4/fib_frontend.c | 13 ++++++++-----
2 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b24d79..b0a14f0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4720,7 +4720,8 @@ static void net_set_todo(struct net_device *dev)
static void rollback_registered_many(struct list_head *head)
{
- struct net_device *dev;
+ struct net_device *dev, *aux, *fdev;
+ LIST_HEAD(rt_flush_list);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -4778,8 +4779,28 @@ static void rollback_registered_many(struct list_head *head)
synchronize_net();
- list_for_each_entry(dev, head, unreg_list)
+ /* flush route cache by resending one NETDEV_UNREGISTER per namespace */
+ list_for_each_entry_safe(dev, aux, head, unreg_list) {
+ int needs_flush = 1;
+ list_for_each_entry(fdev, &rt_flush_list, unreg_list) {
+ if (dev_net(dev) == dev_net(fdev)) {
+ needs_flush = 0;
+ dev_put(dev);
+ break;
+ }
+ }
+ if (needs_flush) {
+ list_del(&dev->unreg_list);
+ list_add(&dev->unreg_list, &rt_flush_list);
+ }
+ }
+
+ list_for_each_entry_safe(dev, aux, &rt_flush_list, unreg_list) {
+ list_del_init(&dev->unreg_list);
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ list_add(&dev->unreg_list, head);
dev_put(dev);
+ }
}
static void rollback_registered(struct net_device *dev)
@@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
* unregister_netdevice_many - unregister many devices
* @head: list of devices
*
+ * WARNING: This function modifies the list. It may change the order of the
+ * elements in the list. However, you can assume it does not add or delete
+ * elements to/from the list.
*/
void unregister_netdevice_many(struct list_head *head)
{
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 816e218..1972760 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -895,11 +895,11 @@ static void nl_fib_lookup_exit(struct net *net)
net->ipv4.fibnl = NULL;
}
-static void fib_disable_ip(struct net_device *dev, int force)
+static void fib_disable_ip(struct net_device *dev, int force, int delay)
{
if (fib_sync_down_dev(dev, force))
fib_flush(dev_net(dev));
- rt_cache_flush(dev_net(dev), 0);
+ rt_cache_flush(dev_net(dev), delay);
arp_ifdown(dev);
}
@@ -922,7 +922,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
Disable IP.
*/
- fib_disable_ip(dev, 1);
+ fib_disable_ip(dev, 1, 0);
} else {
rt_cache_flush(dev_net(dev), -1);
}
@@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (event == NETDEV_UNREGISTER) {
- fib_disable_ip(dev, 2);
+ /* if this event is part of a batch then don't flush the cache
+ * now; we will receive another event at the end of the batch */
+ int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1;
+ fib_disable_ip(dev, 2, rt_flush);
return NOTIFY_DONE;
}
@@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
- fib_disable_ip(dev, 0);
+ fib_disable_ip(dev, 0, 0);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
--
1.5.6.5
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH] net: factorize rt_do_flush for batch device unregistering
2009-11-16 21:08 [PATCH] net: factorize rt_do_flush for batch device unregistering Octavian Purdila
@ 2009-11-16 21:32 ` Eric Dumazet
2009-11-16 22:03 ` Octavian Purdila
0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2009-11-16 21:32 UTC (permalink / raw)
To: Octavian Purdila; +Cc: netdev
Octavian Purdila a écrit :
> Tests performed with per device sysctl/sysfs entries disabled:
>
> $ insmod /lib/modules/dummy.ko numdummies=8000
> $ time rmmod dummy
>
> Without the patch: With the patch:
> real 0m 3.65s real 0m 0.27s
> user 0m 0.00s user 0m 0.00s
> sys 0m 3.42s sys 0m 0.24s
>
> Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
> ---
> net/core/dev.c | 28 ++++++++++++++++++++++++++--
> net/ipv4/fib_frontend.c | 13 ++++++++-----
> 2 files changed, 34 insertions(+), 7 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 4b24d79..b0a14f0 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4720,7 +4720,8 @@ static void net_set_todo(struct net_device *dev)
>
> static void rollback_registered_many(struct list_head *head)
> {
> - struct net_device *dev;
> + struct net_device *dev, *aux, *fdev;
> + LIST_HEAD(rt_flush_list);
>
> BUG_ON(dev_boot_phase);
> ASSERT_RTNL();
> @@ -4778,8 +4779,28 @@ static void rollback_registered_many(struct list_head *head)
>
> synchronize_net();
>
> - list_for_each_entry(dev, head, unreg_list)
> + /* flush route cache by resending one NETDEV_UNREGISTER per namespace */
> + list_for_each_entry_safe(dev, aux, head, unreg_list) {
> + int needs_flush = 1;
> + list_for_each_entry(fdev, &rt_flush_list, unreg_list) {
> + if (dev_net(dev) == dev_net(fdev)) {
> + needs_flush = 0;
> + dev_put(dev);
> + break;
> + }
> + }
> + if (needs_flush) {
> + list_del(&dev->unreg_list);
> + list_add(&dev->unreg_list, &rt_flush_list);
list_move ...
> + }
> + }
> +
> + list_for_each_entry_safe(dev, aux, &rt_flush_list, unreg_list) {
> + list_del_init(&dev->unreg_list);
> + call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
> + list_add(&dev->unreg_list, head);
> dev_put(dev);
> + }
> }
>
> static void rollback_registered(struct net_device *dev)
> @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
> * unregister_netdevice_many - unregister many devices
> * @head: list of devices
> *
> + * WARNING: This function modifies the list. It may change the order of the
> + * elements in the list. However, you can assume it does not add or delete
> + * elements to/from the list.
Sorry I dont understand this comment
> */
> void unregister_netdevice_many(struct list_head *head)
> {
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 816e218..1972760 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -895,11 +895,11 @@ static void nl_fib_lookup_exit(struct net *net)
> net->ipv4.fibnl = NULL;
> }
>
> -static void fib_disable_ip(struct net_device *dev, int force)
> +static void fib_disable_ip(struct net_device *dev, int force, int delay)
> {
> if (fib_sync_down_dev(dev, force))
> fib_flush(dev_net(dev));
> - rt_cache_flush(dev_net(dev), 0);
> + rt_cache_flush(dev_net(dev), delay);
> arp_ifdown(dev);
> }
>
> @@ -922,7 +922,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
> /* Last address was deleted from this interface.
> Disable IP.
> */
> - fib_disable_ip(dev, 1);
> + fib_disable_ip(dev, 1, 0);
> } else {
> rt_cache_flush(dev_net(dev), -1);
> }
> @@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
> struct in_device *in_dev = __in_dev_get_rtnl(dev);
>
> if (event == NETDEV_UNREGISTER) {
> - fib_disable_ip(dev, 2);
> + /* if this event is part of a batch then don't flush the cache
> + * now; we will receive another event at the end of the batch */
> + int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1;
hmm... a bit ugly...
> + fib_disable_ip(dev, 2, rt_flush);
> return NOTIFY_DONE;
> }
>
> @@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
> rt_cache_flush(dev_net(dev), -1);
> break;
> case NETDEV_DOWN:
> - fib_disable_ip(dev, 0);
> + fib_disable_ip(dev, 0, 0);
> break;
> case NETDEV_CHANGEMTU:
> case NETDEV_CHANGE:
Are you sure you want to overload NETDEV_UNREGISTER ?
Maybe it would be cleaner to add a new value, NETDEV_UNREGISTER_PERNET or something
for the final loop...
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] net: factorize rt_do_flush for batch device unregistering
2009-11-16 21:32 ` Eric Dumazet
@ 2009-11-16 22:03 ` Octavian Purdila
2009-11-16 22:15 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Octavian Purdila @ 2009-11-16 22:03 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev
On Monday 16 November 2009 23:32:55 you wrote:
> > @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
> > * unregister_netdevice_many - unregister many devices
> > * @head: list of devices
> > *
> > + * WARNING: This function modifies the list. It may change the order of
> > the + * elements in the list. However, you can assume it does not add or
> > delete + * elements to/from the list.
>
> Sorry I dont understand this comment
>
The list passed to unregister_netdevice_many(), as the "head" parameter, may
be altered, e.g. order may change between the elements.
That is because we temporarily move the items from the list to the
rt_flush_list for the flush. When we add the items back they may not be added in
the same place.
Perhaps the confusion comes from the fact that I did not specified which list?
(i.e. head)
> > @@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block
> > *this, unsigned long event, vo struct in_device *in_dev =
> > __in_dev_get_rtnl(dev);
> >
> > if (event == NETDEV_UNREGISTER) {
> > - fib_disable_ip(dev, 2);
> > + /* if this event is part of a batch then don't flush the cache
> > + * now; we will receive another event at the end of the batch */
> > + int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1;
>
> hmm... a bit ugly...
>
Would it be better if I would add a dev_is_batch_unregister() instead?
Or add a new device flag to explicitly signal the batch unregister?
> > + fib_disable_ip(dev, 2, rt_flush);
> > return NOTIFY_DONE;
> > }
> >
> > @@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block
> > *this, unsigned long event, vo rt_cache_flush(dev_net(dev), -1);
> > break;
> > case NETDEV_DOWN:
> > - fib_disable_ip(dev, 0);
> > + fib_disable_ip(dev, 0, 0);
> > break;
> > case NETDEV_CHANGEMTU:
> > case NETDEV_CHANGE:
>
> Are you sure you want to overload NETDEV_UNREGISTER ?
>
> Maybe it would be cleaner to add a new value, NETDEV_UNREGISTER_PERNET or
> something for the final loop...
>
Hmm, I think that will allow us to get rid of the ugly test: never flush the
cache for NETDEV_UNREGISTER, only flush it for NETDEV_UNREGISTER_PERNET.
We just need to make sure to add NETDEV_UNREGISTER_PERNET in other places
where NETDEV_UNREGISTER is called.
I'll try this in the next patch. Thanks for reviewing.
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] net: factorize rt_do_flush for batch device unregistering
2009-11-16 22:03 ` Octavian Purdila
@ 2009-11-16 22:15 ` Eric Dumazet
0 siblings, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2009-11-16 22:15 UTC (permalink / raw)
To: Octavian Purdila; +Cc: netdev
Octavian Purdila a écrit :
> On Monday 16 November 2009 23:32:55 you wrote:
>
>>> @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
>>> * unregister_netdevice_many - unregister many devices
>>> * @head: list of devices
>>> *
>>> + * WARNING: This function modifies the list. It may change the order of
>>> the + * elements in the list. However, you can assume it does not add or
>>> delete + * elements to/from the list.
>> Sorry I dont understand this comment
>>
>
> The list passed to unregister_netdevice_many(), as the "head" parameter, may
> be altered, e.g. order may change between the elements.
>
> That is because we temporarily move the items from the list to the
> rt_flush_list for the flush. When we add the items back they may not be added in
> the same place.
>
Ah, I got it now, confusion is that comment makes more sense for
rollback_registered_many() because when reading unregister_netdevice_many()
it is clear it doesnt change the list...
void unregister_netdevice_many(struct list_head *head)
{
struct net_device *dev;
if (!list_empty(head)) {
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
}
}
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-11-16 22:15 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-16 21:08 [PATCH] net: factorize rt_do_flush for batch device unregistering Octavian Purdila
2009-11-16 21:32 ` Eric Dumazet
2009-11-16 22:03 ` Octavian Purdila
2009-11-16 22:15 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).