* [PATCH net-next-2.6] net: add dev_close_many
@ 2010-12-13 14:18 Octavian Purdila
2010-12-13 16:52 ` Eric Dumazet
2010-12-13 17:54 ` David Miller
0 siblings, 2 replies; 9+ messages in thread
From: Octavian Purdila @ 2010-12-13 14:18 UTC (permalink / raw)
To: netdev; +Cc: Lucian Adrian Grijincu, Vlad Dogaru, Octavian Purdila
Add dev_close_many and dev_deactivate_many to factorize another
expensive sync-rcu operation in the netdevice unregister path.
$ modprobe dummy numdummies=10000
$ ip link set dev dummy* up
$ time rmmod dummy
Without the patch With the patch
real 0m 24.63s real 0m 5.15s
user 0m 0.00s user 0m 0.00s
sys 0m 6.05s sys 0m 5.14s
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
include/net/sch_generic.h | 1 +
net/core/dev.c | 121 ++++++++++++++++++++++++++++----------------
net/sched/sch_generic.c | 29 ++++++++---
3 files changed, 100 insertions(+), 51 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ea1f8a8..786cc39 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -321,6 +321,7 @@ extern void dev_init_scheduler(struct net_device *dev);
extern void dev_shutdown(struct net_device *dev);
extern void dev_activate(struct net_device *dev);
extern void dev_deactivate(struct net_device *dev);
+extern void dev_deactivate_many(struct list_head *head);
extern struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc);
extern void qdisc_reset(struct Qdisc *qdisc);
diff --git a/net/core/dev.c b/net/core/dev.c
index d28b3a0..7cab19f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1222,51 +1222,88 @@ int dev_open(struct net_device *dev)
}
EXPORT_SYMBOL(dev_open);
-static int __dev_close(struct net_device *dev)
+static int __dev_close_many(struct list_head *head)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct net_device *dev;
- ASSERT_RTNL();
- might_sleep();
+ list_for_each_entry(dev, head, unreg_list) {
+ ASSERT_RTNL();
+ might_sleep();
- /*
- * Tell people we are going down, so that they can
- * prepare to death, when device is still operating.
- */
- call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+ /*
+ * Tell people we are going down, so that they can
+ * prepare to death, when device is still operating.
+ */
+ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
- clear_bit(__LINK_STATE_START, &dev->state);
+ clear_bit(__LINK_STATE_START, &dev->state);
- /* Synchronize to scheduled poll. We cannot touch poll list,
- * it can be even on different cpu. So just clear netif_running().
- *
- * dev->stop() will invoke napi_disable() on all of it's
- * napi_struct instances on this device.
- */
- smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ /* Synchronize to scheduled poll. We cannot touch poll list, it
+ * can be even on different cpu. So just clear netif_running().
+ *
+ * dev->stop() will invoke napi_disable() on all of it's
+ * napi_struct instances on this device.
+ */
+ smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ }
- dev_deactivate(dev);
+ dev_deactivate_many(head);
- /*
- * Call the device specific close. This cannot fail.
- * Only if device is UP
- *
- * We allow it to be called even after a DETACH hot-plug
- * event.
- */
- if (ops->ndo_stop)
- ops->ndo_stop(dev);
+ list_for_each_entry(dev, head, unreg_list) {
+ const struct net_device_ops *ops = dev->netdev_ops;
- /*
- * Device is now down.
- */
+ /*
+ * Call the device specific close. This cannot fail.
+ * Only if device is UP
+ *
+ * We allow it to be called even after a DETACH hot-plug
+ * event.
+ */
+ if (ops->ndo_stop)
+ ops->ndo_stop(dev);
+
+ /*
+ * Device is now down.
+ */
+
+ dev->flags &= ~IFF_UP;
+
+ /*
+ * Shutdown NET_DMA
+ */
+ net_dmaengine_put();
+ }
- dev->flags &= ~IFF_UP;
+ return 0;
+}
+
+static int __dev_close(struct net_device *dev)
+{
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ return __dev_close_many(&single);
+}
+
+int dev_close_many(struct list_head *head)
+{
+ struct net_device *dev, *tmp;
+
+ list_for_each_entry_safe(dev, tmp, head, unreg_list)
+ if (!(dev->flags & IFF_UP)) {
+ list_del(&dev->unreg_list);
+ continue;
+ }
+
+ __dev_close_many(head);
/*
- * Shutdown NET_DMA
+ * Tell people we are down
*/
- net_dmaengine_put();
+ list_for_each_entry(dev, head, unreg_list) {
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
+ }
return 0;
}
@@ -1282,16 +1319,10 @@ static int __dev_close(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
- if (!(dev->flags & IFF_UP))
- return 0;
-
- __dev_close(dev);
+ LIST_HEAD(single);
- /*
- * Tell people we are down
- */
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
- call_netdevice_notifiers(NETDEV_DOWN, dev);
+ list_add(&dev->unreg_list, &single);
+ dev_close_many(&single);
return 0;
}
@@ -4958,10 +4989,12 @@ static void rollback_registered_many(struct list_head *head)
}
BUG_ON(dev->reg_state != NETREG_REGISTERED);
+ }
- /* If device is running, close it first. */
- dev_close(dev);
+ /* If device is running, close it first. */
+ dev_close_many(head);
+ list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
unlist_netdevice(dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0918834..34dc598 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -810,20 +810,35 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
-void dev_deactivate(struct net_device *dev)
+void dev_deactivate_many(struct list_head *head)
{
- netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
- if (dev_ingress_queue(dev))
- dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
+ struct net_device *dev;
- dev_watchdog_down(dev);
+ list_for_each_entry(dev, head, unreg_list) {
+ netdev_for_each_tx_queue(dev, dev_deactivate_queue,
+ &noop_qdisc);
+ if (dev_ingress_queue(dev))
+ dev_deactivate_queue(dev, dev_ingress_queue(dev),
+ &noop_qdisc);
+
+ dev_watchdog_down(dev);
+ }
/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
synchronize_rcu();
/* Wait for outstanding qdisc_run calls. */
- while (some_qdisc_is_busy(dev))
- yield();
+ list_for_each_entry(dev, head, unreg_list)
+ while (some_qdisc_is_busy(dev))
+ yield();
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ dev_deactivate_many(&single);
}
static void dev_init_scheduler_queue(struct net_device *dev,
--
1.7.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 14:18 [PATCH net-next-2.6] net: add dev_close_many Octavian Purdila
@ 2010-12-13 16:52 ` Eric Dumazet
2010-12-13 17:23 ` Octavian Purdila
2010-12-13 17:54 ` David Miller
1 sibling, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2010-12-13 16:52 UTC (permalink / raw)
To: Octavian Purdila; +Cc: netdev, Lucian Adrian Grijincu, Vlad Dogaru
Le lundi 13 décembre 2010 à 16:18 +0200, Octavian Purdila a écrit :
> Add dev_close_many and dev_deactivate_many to factorize another
> expensive sync-rcu operation in the netdevice unregister path.
>
> $ modprobe dummy numdummies=10000
> $ ip link set dev dummy* up
> $ time rmmod dummy
>
> Without the patch With the patch
>
> real 0m 24.63s real 0m 5.15s
> user 0m 0.00s user 0m 0.00s
> sys 0m 6.05s sys 0m 5.14s
>
> Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
> ---
Hmm, I think this solves the "rmmod dummy" case, but not the "dismantle
devices one by one", which is the general one (on heavy duty tunnels/ppp
servers)
I think we could use a kernel thread (a workqueue presumably), handling
3 lists of devices to be dismantled, respecting one rcu grace period (or
rcu_barrier()) before transfert of one item from one list to following
one.
This way, each device removal could post a device to this kernel thread
and return to user immediately. Time of RTNL hold would be reduced
(calls to synchronize_rcu() would be done with RTNL not held)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 16:52 ` Eric Dumazet
@ 2010-12-13 17:23 ` Octavian Purdila
2010-12-13 17:32 ` Stephen Hemminger
0 siblings, 1 reply; 9+ messages in thread
From: Octavian Purdila @ 2010-12-13 17:23 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, Lucian Adrian Grijincu, Vlad Dogaru
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Monday 13 December 2010, 18:52:25
> Hmm, I think this solves the "rmmod dummy" case, but not the "dismantle
> devices one by one", which is the general one (on heavy duty tunnels/ppp
> servers)
>
> I think we could use a kernel thread (a workqueue presumably), handling
> 3 lists of devices to be dismantled, respecting one rcu grace period (or
> rcu_barrier()) before transfert of one item from one list to following
> one.
>
> This way, each device removal could post a device to this kernel thread
> and return to user immediately. Time of RTNL hold would be reduced
> (calls to synchronize_rcu() would be done with RTNL not held)
We also run into the case where we have to dismantle the interfaces one by one
but we fix it by gathering the requests in userspace and then doing a
unregister_netdevice_many operation.
I like the kernel thread / workqueue idea. But we would still need
netdevice_unregister_many and dev_close_many right? - we put the device in the
unregister list in unregister_netdevice and call unregister_netdevice_many in
the kernel thread.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 17:23 ` Octavian Purdila
@ 2010-12-13 17:32 ` Stephen Hemminger
2010-12-13 17:52 ` Octavian Purdila
0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2010-12-13 17:32 UTC (permalink / raw)
To: Octavian Purdila
Cc: Eric Dumazet, netdev, Lucian Adrian Grijincu, Vlad Dogaru
On Mon, 13 Dec 2010 19:23:26 +0200
Octavian Purdila <opurdila@ixiacom.com> wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Monday 13 December 2010, 18:52:25
>
> > Hmm, I think this solves the "rmmod dummy" case, but not the "dismantle
> > devices one by one", which is the general one (on heavy duty tunnels/ppp
> > servers)
> >
> > I think we could use a kernel thread (a workqueue presumably), handling
> > 3 lists of devices to be dismantled, respecting one rcu grace period (or
> > rcu_barrier()) before transfert of one item from one list to following
> > one.
> >
> > This way, each device removal could post a device to this kernel thread
> > and return to user immediately. Time of RTNL hold would be reduced
> > (calls to synchronize_rcu() would be done with RTNL not held)
>
> We also run into the case where we have to dismantle the interfaces one by one
> but we fix it by gathering the requests in userspace and then doing a
> unregister_netdevice_many operation.
>
> I like the kernel thread / workqueue idea. But we would still need
> netdevice_unregister_many and dev_close_many right? - we put the device in the
> unregister list in unregister_netdevice and call unregister_netdevice_many in
> the kernel thread.
With a message based interface, there shouldn't be a need for this.
Just have one thread sending requests in user space, and one receiving
the ACK's.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 17:32 ` Stephen Hemminger
@ 2010-12-13 17:52 ` Octavian Purdila
2010-12-13 18:04 ` Stephen Hemminger
0 siblings, 1 reply; 9+ messages in thread
From: Octavian Purdila @ 2010-12-13 17:52 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Eric Dumazet, netdev, Lucian Adrian Grijincu, Vlad Dogaru
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Monday 13 December 2010, 19:32:21
> With a message based interface, there shouldn't be a need for this.
> Just have one thread sending requests in user space, and one receiving
> the ACK's.
Sorry, you lost me here :) There is no need for the kernel thread / workqueue
or not even for dev_close_many?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 14:18 [PATCH net-next-2.6] net: add dev_close_many Octavian Purdila
2010-12-13 16:52 ` Eric Dumazet
@ 2010-12-13 17:54 ` David Miller
1 sibling, 0 replies; 9+ messages in thread
From: David Miller @ 2010-12-13 17:54 UTC (permalink / raw)
To: opurdila; +Cc: netdev, lucian.grijincu, ddvlad
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Mon, 13 Dec 2010 16:18:23 +0200
> -static int __dev_close(struct net_device *dev)
> +static int __dev_close_many(struct list_head *head)
> {
> - const struct net_device_ops *ops = dev->netdev_ops;
> + struct net_device *dev;
>
> - ASSERT_RTNL();
> - might_sleep();
> + list_for_each_entry(dev, head, unreg_list) {
> + ASSERT_RTNL();
> + might_sleep();
It doesn't make any sense to put these insertions into this loop since
they are testing top-level invariants that must be provided by the
caller.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 17:52 ` Octavian Purdila
@ 2010-12-13 18:04 ` Stephen Hemminger
2010-12-13 20:54 ` Octavian Purdila
0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2010-12-13 18:04 UTC (permalink / raw)
To: Octavian Purdila
Cc: Eric Dumazet, netdev, Lucian Adrian Grijincu, Vlad Dogaru
On Mon, 13 Dec 2010 19:52:39 +0200
Octavian Purdila <opurdila@ixiacom.com> wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Monday 13 December 2010, 19:32:21
>
> > With a message based interface, there shouldn't be a need for this.
> > Just have one thread sending requests in user space, and one receiving
> > the ACK's.
>
> Sorry, you lost me here :) There is no need for the kernel thread / workqueue
> or not even for dev_close_many?
I assume the need for dev_close_many is coming from a user space application?
I expect that for this kind of special need, you would be better off not
using the normal iproute utilities and instead have a custom device manager
that is doing netlink directly.
Rather than doing synchronous send request and wait for ack. The utility
could use a sender and collector thread.
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 18:04 ` Stephen Hemminger
@ 2010-12-13 20:54 ` Octavian Purdila
2010-12-13 23:34 ` Stephen Hemminger
0 siblings, 1 reply; 9+ messages in thread
From: Octavian Purdila @ 2010-12-13 20:54 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Eric Dumazet, netdev, Lucian Adrian Grijincu, Vlad Dogaru
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Monday 13 December 2010, 20:04:47
> I assume the need for dev_close_many is coming from a user space
> application?
>
> I expect that for this kind of special need, you would be better off not
> using the normal iproute utilities and instead have a custom device manager
> that is doing netlink directly.
>
> Rather than doing synchronous send request and wait for ack. The utility
> could use a sender and collector thread.
Actually we need dev_close_many in order to speed up netdev_unregister_many
since in the netdev_unregister_many path there is still one more sync-rcu
operation which is not factorized.
I will prepare v2 to address David's comment and I will also be more explicit
in the commit message.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6] net: add dev_close_many
2010-12-13 20:54 ` Octavian Purdila
@ 2010-12-13 23:34 ` Stephen Hemminger
0 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2010-12-13 23:34 UTC (permalink / raw)
To: Octavian Purdila
Cc: Eric Dumazet, netdev, Lucian Adrian Grijincu, Vlad Dogaru
On Mon, 13 Dec 2010 22:54:24 +0200
Octavian Purdila <opurdila@ixiacom.com> wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Monday 13 December 2010, 20:04:47
>
> > I assume the need for dev_close_many is coming from a user space
> > application?
> >
> > I expect that for this kind of special need, you would be better off not
> > using the normal iproute utilities and instead have a custom device manager
> > that is doing netlink directly.
> >
> > Rather than doing synchronous send request and wait for ack. The utility
> > could use a sender and collector thread.
>
> Actually we need dev_close_many in order to speed up netdev_unregister_many
> since in the netdev_unregister_many path there is still one more sync-rcu
> operation which is not factorized.
>
> I will prepare v2 to address David's comment and I will also be more explicit
> in the commit message.
That makes sense.
--
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2010-12-13 23:34 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-13 14:18 [PATCH net-next-2.6] net: add dev_close_many Octavian Purdila
2010-12-13 16:52 ` Eric Dumazet
2010-12-13 17:23 ` Octavian Purdila
2010-12-13 17:32 ` Stephen Hemminger
2010-12-13 17:52 ` Octavian Purdila
2010-12-13 18:04 ` Stephen Hemminger
2010-12-13 20:54 ` Octavian Purdila
2010-12-13 23:34 ` Stephen Hemminger
2010-12-13 17:54 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).