From: Eric Dumazet <eric.dumazet@gmail.com>
To: Octavian Purdila <opurdila@ixiacom.com>
Cc: netdev@vger.kernel.org
Subject: Re: [PATCH] net: factorize rt_do_flush for batch device unregistering
Date: Mon, 16 Nov 2009 22:32:55 +0100 [thread overview]
Message-ID: <4B01C507.9050908@gmail.com> (raw)
In-Reply-To: <200911162308.59730.opurdila@ixiacom.com>
Octavian Purdila a écrit :
> Tests performed with per device sysctl/sysfs entries disabled:
>
> $ insmod /lib/modules/dummy.ko numdummies=8000
> $ time rmmod dummy
>
> Without the patch: With the patch:
> real 0m 3.65s real 0m 0.27s
> user 0m 0.00s user 0m 0.00s
> sys 0m 3.42s sys 0m 0.24s
>
> Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
> ---
> net/core/dev.c | 28 ++++++++++++++++++++++++++--
> net/ipv4/fib_frontend.c | 13 ++++++++-----
> 2 files changed, 34 insertions(+), 7 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 4b24d79..b0a14f0 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4720,7 +4720,8 @@ static void net_set_todo(struct net_device *dev)
>
> static void rollback_registered_many(struct list_head *head)
> {
> - struct net_device *dev;
> + struct net_device *dev, *aux, *fdev;
> + LIST_HEAD(rt_flush_list);
>
> BUG_ON(dev_boot_phase);
> ASSERT_RTNL();
> @@ -4778,8 +4779,28 @@ static void rollback_registered_many(struct list_head *head)
>
> synchronize_net();
>
> - list_for_each_entry(dev, head, unreg_list)
> + /* flush route cache by resending one NETDEV_UNREGISTER per namespace */
> + list_for_each_entry_safe(dev, aux, head, unreg_list) {
> + int needs_flush = 1;
> + list_for_each_entry(fdev, &rt_flush_list, unreg_list) {
> + if (dev_net(dev) == dev_net(fdev)) {
> + needs_flush = 0;
> + dev_put(dev);
> + break;
> + }
> + }
> + if (needs_flush) {
> + list_del(&dev->unreg_list);
> + list_add(&dev->unreg_list, &rt_flush_list);
list_move ...
> + }
> + }
> +
> + list_for_each_entry_safe(dev, aux, &rt_flush_list, unreg_list) {
> + list_del_init(&dev->unreg_list);
> + call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
> + list_add(&dev->unreg_list, head);
> dev_put(dev);
> + }
> }
>
> static void rollback_registered(struct net_device *dev)
> @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
> * unregister_netdevice_many - unregister many devices
> * @head: list of devices
> *
> + * WARNING: This function modifies the list. It may change the order of the
> + * elements in the list. However, you can assume it does not add or delete
> + * elements to/from the list.
Sorry I dont understand this comment
> */
> void unregister_netdevice_many(struct list_head *head)
> {
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 816e218..1972760 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -895,11 +895,11 @@ static void nl_fib_lookup_exit(struct net *net)
> net->ipv4.fibnl = NULL;
> }
>
> -static void fib_disable_ip(struct net_device *dev, int force)
> +static void fib_disable_ip(struct net_device *dev, int force, int delay)
> {
> if (fib_sync_down_dev(dev, force))
> fib_flush(dev_net(dev));
> - rt_cache_flush(dev_net(dev), 0);
> + rt_cache_flush(dev_net(dev), delay);
> arp_ifdown(dev);
> }
>
> @@ -922,7 +922,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
> /* Last address was deleted from this interface.
> Disable IP.
> */
> - fib_disable_ip(dev, 1);
> + fib_disable_ip(dev, 1, 0);
> } else {
> rt_cache_flush(dev_net(dev), -1);
> }
> @@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
> struct in_device *in_dev = __in_dev_get_rtnl(dev);
>
> if (event == NETDEV_UNREGISTER) {
> - fib_disable_ip(dev, 2);
> + /* if this event is part of a batch then don't flush the cache
> + * now; we will receive another event at the end of the batch */
> + int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1;
hmm... a bit ugly...
> + fib_disable_ip(dev, 2, rt_flush);
> return NOTIFY_DONE;
> }
>
> @@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
> rt_cache_flush(dev_net(dev), -1);
> break;
> case NETDEV_DOWN:
> - fib_disable_ip(dev, 0);
> + fib_disable_ip(dev, 0, 0);
> break;
> case NETDEV_CHANGEMTU:
> case NETDEV_CHANGE:
Are you sure you want to overload NETDEV_UNREGISTER ?
Maybe it would be cleaner to add a new value, NETDEV_UNREGISTER_PERNET or something
for the final loop...
next prev parent reply other threads:[~2009-11-16 21:32 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-11-16 21:08 [PATCH] net: factorize rt_do_flush for batch device unregistering Octavian Purdila
2009-11-16 21:32 ` Eric Dumazet [this message]
2009-11-16 22:03 ` Octavian Purdila
2009-11-16 22:15 ` Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B01C507.9050908@gmail.com \
--to=eric.dumazet@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=opurdila@ixiacom.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).