* [PATCH net-next 1/3] net: Flush all skbs related to an unregistered device
2015-05-19 23:24 [PATCH net-next 0/3] Unserialize netdev_run_todo Baptiste Covolato
@ 2015-05-19 23:24 ` Baptiste Covolato
2015-05-21 21:02 ` David Miller
2015-05-19 23:24 ` [PATCH net-next 2/3] net: Inline netdev_wait_allrefs inside netdev_run_todo Baptiste Covolato
2015-05-19 23:24 ` [PATCH net-next 3/3] net: Make netdev_run_todo call notifiers in parallel Baptiste Covolato
2 siblings, 1 reply; 7+ messages in thread
From: Baptiste Covolato @ 2015-05-19 23:24 UTC (permalink / raw)
To: David S. Miller, netdev
Cc: Francesco Ruggeri, Eric Mowat, Adrien Schildknecht
Update flush_backlog to flush all packets in the backlog queue belonging
to a device being unregistered. Accordingly on_each_cpu no longer needs
to pass a device to flush_backlog since it handles any device in the
NETREG_UNREGISTERED state.
Signed-off-by: Baptiste Covolato <baptiste@arista.com>
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
---
net/core/dev.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0e7afef..db59d18 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4005,13 +4005,12 @@ EXPORT_SYMBOL(netif_receive_skb_sk);
*/
static void flush_backlog(void *arg)
{
- struct net_device *dev = arg;
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
struct sk_buff *skb, *tmp;
rps_lock(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev == dev) {
+ if (skb->dev->reg_state == NETREG_UNREGISTERED) {
__skb_unlink(skb, &sd->input_pkt_queue);
kfree_skb(skb);
input_queue_head_incr(sd);
@@ -4020,7 +4019,7 @@ static void flush_backlog(void *arg)
rps_unlock(sd);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev == dev) {
+ if (skb->dev->reg_state == NETREG_UNREGISTERED) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
input_queue_head_incr(sd);
@@ -6790,7 +6789,7 @@ void netdev_run_todo(void)
dev->reg_state = NETREG_UNREGISTERED;
- on_each_cpu(flush_backlog, dev, 1);
+ on_each_cpu(flush_backlog, NULL, 1);
netdev_wait_allrefs(dev);
--
2.4.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 2/3] net: Inline netdev_wait_allrefs inside netdev_run_todo
2015-05-19 23:24 [PATCH net-next 0/3] Unserialize netdev_run_todo Baptiste Covolato
2015-05-19 23:24 ` [PATCH net-next 1/3] net: Flush all skbs related to an unregistered device Baptiste Covolato
@ 2015-05-19 23:24 ` Baptiste Covolato
2015-05-19 23:24 ` [PATCH net-next 3/3] net: Make netdev_run_todo call notifiers in parallel Baptiste Covolato
2 siblings, 0 replies; 7+ messages in thread
From: Baptiste Covolato @ 2015-05-19 23:24 UTC (permalink / raw)
To: David S. Miller, netdev
Cc: Francesco Ruggeri, Eric Mowat, Adrien Schildknecht
Make netdev_wait_allrefs part of netdev_run_todo
Signed-off-by: Baptiste Covolato <baptiste@arista.com>
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
---
net/core/dev.c | 110 ++++++++++++++++++++++++---------------------------------
1 file changed, 47 insertions(+), 63 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index db59d18..9b0814b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6671,68 +6671,6 @@ int netdev_refcnt_read(const struct net_device *dev)
}
EXPORT_SYMBOL(netdev_refcnt_read);
-/**
- * netdev_wait_allrefs - wait until all references are gone.
- * @dev: target net_device
- *
- * This is called when unregistering network devices.
- *
- * Any protocol or device that holds a reference should register
- * for netdevice notification, and cleanup and put back the
- * reference if they receive an UNREGISTER event.
- * We can get stuck here if buggy protocols don't correctly
- * call dev_put.
- */
-static void netdev_wait_allrefs(struct net_device *dev)
-{
- unsigned long rebroadcast_time, warning_time;
- int refcnt;
-
- linkwatch_forget_dev(dev);
-
- rebroadcast_time = warning_time = jiffies;
- refcnt = netdev_refcnt_read(dev);
-
- while (refcnt != 0) {
- if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
- rtnl_lock();
-
- /* Rebroadcast unregister notification */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-
- __rtnl_unlock();
- rcu_barrier();
- rtnl_lock();
-
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
- &dev->state)) {
- /* We must not have linkwatch events
- * pending on unregister. If this
- * happens, we simply run the queue
- * unscheduled, resulting in a noop
- * for this device.
- */
- linkwatch_run_queue();
- }
-
- __rtnl_unlock();
-
- rebroadcast_time = jiffies;
- }
-
- msleep(250);
-
- refcnt = netdev_refcnt_read(dev);
-
- if (time_after(jiffies, warning_time + 10 * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
- dev->name, refcnt);
- warning_time = jiffies;
- }
- }
-}
-
/* The sequence is:
*
* rtnl_lock();
@@ -6760,6 +6698,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
void netdev_run_todo(void)
{
struct list_head list;
+ unsigned long rebroadcast_time, warning_time;
/* Snapshot list, allow later requests */
list_replace_init(&net_todo_list, &list);
@@ -6772,6 +6711,7 @@ void netdev_run_todo(void)
rcu_barrier();
while (!list_empty(&list)) {
+ int refcnt;
struct net_device *dev
= list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
@@ -6791,7 +6731,51 @@ void netdev_run_todo(void)
on_each_cpu(flush_backlog, NULL, 1);
- netdev_wait_allrefs(dev);
+ linkwatch_forget_dev(dev);
+
+ rebroadcast_time = warning_time = jiffies;
+ refcnt = netdev_refcnt_read(dev);
+
+ while (refcnt != 0) {
+ if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
+ rtnl_lock();
+
+ /* Rebroadcast unregister notification */
+ call_netdevice_notifiers(NETDEV_UNREGISTER,
+ dev);
+
+ __rtnl_unlock();
+ rcu_barrier();
+ rtnl_lock();
+
+ call_netdevice_notifiers(
+ NETDEV_UNREGISTER_FINAL, dev);
+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &dev->state)) {
+ /* We must not have linkwatch events
+ * pending on unregister. If this
+ * happens, we simply run the queue
+ * unscheduled, resulting in a noop
+ * for this device.
+ */
+ linkwatch_run_queue();
+ }
+
+ __rtnl_unlock();
+
+ rebroadcast_time = jiffies;
+ }
+
+ msleep(250);
+
+ refcnt = netdev_refcnt_read(dev);
+
+ if (time_after(jiffies, warning_time + 10 * HZ)) {
+ pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ dev->name, refcnt);
+ warning_time = jiffies;
+ }
+ }
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
--
2.4.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 3/3] net: Make netdev_run_todo call notifiers in parallel.
2015-05-19 23:24 [PATCH net-next 0/3] Unserialize netdev_run_todo Baptiste Covolato
2015-05-19 23:24 ` [PATCH net-next 1/3] net: Flush all skbs related to an unregistered device Baptiste Covolato
2015-05-19 23:24 ` [PATCH net-next 2/3] net: Inline netdev_wait_allrefs inside netdev_run_todo Baptiste Covolato
@ 2015-05-19 23:24 ` Baptiste Covolato
2 siblings, 0 replies; 7+ messages in thread
From: Baptiste Covolato @ 2015-05-19 23:24 UTC (permalink / raw)
To: David S. Miller, netdev
Cc: Francesco Ruggeri, Eric Mowat, Adrien Schildknecht
In the case of unregister_netdevice_many, a queue of devices is
deleted but the final notifications are processed serially, with the next
one waiting until the previous device has been completely destroyed. This
patch allows netdev_run_todo to send all notifications at once, reducing
the total processing time for a large list.
Signed-off-by: Baptiste Covolato <baptiste@arista.com>
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
---
net/core/dev.c | 146 ++++++++++++++++++++++++++++++++-------------------------
1 file changed, 83 insertions(+), 63 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b0814b..00c512e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6698,105 +6698,125 @@ EXPORT_SYMBOL(netdev_refcnt_read);
void netdev_run_todo(void)
{
struct list_head list;
+ struct net_device *dev, *n;
unsigned long rebroadcast_time, warning_time;
+ LIST_HEAD(cleanup_list);
/* Snapshot list, allow later requests */
list_replace_init(&net_todo_list, &list);
__rtnl_unlock();
-
/* Wait for rcu callbacks to finish before next phase */
- if (!list_empty(&list))
- rcu_barrier();
+ if (list_empty(&list))
+ return;
- while (!list_empty(&list)) {
- int refcnt;
- struct net_device *dev
- = list_first_entry(&list, struct net_device, todo_list);
- list_del(&dev->todo_list);
+ rcu_barrier();
- rtnl_lock();
+ rtnl_lock();
+ list_for_each_entry(dev, &list, todo_list)
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- __rtnl_unlock();
+ __rtnl_unlock();
+ list_for_each_entry_safe(dev, n, &list, todo_list) {
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state);
dump_stack();
+ list_del(&dev->todo_list);
continue;
}
dev->reg_state = NETREG_UNREGISTERED;
+ }
- on_each_cpu(flush_backlog, NULL, 1);
+ on_each_cpu(flush_backlog, NULL, 1);
+ list_for_each_entry(dev, &list, todo_list)
linkwatch_forget_dev(dev);
- rebroadcast_time = warning_time = jiffies;
- refcnt = netdev_refcnt_read(dev);
-
- while (refcnt != 0) {
- if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
- rtnl_lock();
-
- /* Rebroadcast unregister notification */
- call_netdevice_notifiers(NETDEV_UNREGISTER,
- dev);
-
- __rtnl_unlock();
- rcu_barrier();
- rtnl_lock();
-
- call_netdevice_notifiers(
- NETDEV_UNREGISTER_FINAL, dev);
- if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
- &dev->state)) {
- /* We must not have linkwatch events
- * pending on unregister. If this
- * happens, we simply run the queue
- * unscheduled, resulting in a noop
- * for this device.
- */
- linkwatch_run_queue();
- }
+ rebroadcast_time = warning_time = jiffies;
+again:
+ list_for_each_entry_safe(dev, n, &list, todo_list) {
+ if (netdev_refcnt_read(dev) == 0)
+ list_move(&dev->todo_list, &cleanup_list);
+ }
+
+ if (!list_empty(&cleanup_list)) {
+ list_for_each_entry(dev, &cleanup_list, todo_list) {
+ /* paranoia */
+ BUG_ON(netdev_refcnt_read(dev));
+ BUG_ON(!list_empty(&dev->ptype_all));
+ BUG_ON(!list_empty(&dev->ptype_specific));
+ WARN_ON(rcu_access_pointer(dev->ip_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+ WARN_ON(dev->dn_ptr);
+
+ if (dev->destructor)
+ dev->destructor(dev);
+ }
+
+ /* Report some network devices have been unregistered */
+ rtnl_lock();
+ list_for_each_entry(dev, &cleanup_list, todo_list)
+ dev_net(dev)->dev_unreg_count--;
+ __rtnl_unlock();
+ wake_up(&netdev_unregistering_wq);
- __rtnl_unlock();
+ list_for_each_entry_safe(dev, n, &cleanup_list, todo_list) {
+ list_del(&dev->todo_list);
- rebroadcast_time = jiffies;
- }
+ /* Free network devices */
+ kobject_put(&dev->dev.kobj);
+ }
+ }
- msleep(250);
+ /* No more interface to delete */
+ if (list_empty(&list))
+ return;
- refcnt = netdev_refcnt_read(dev);
+ if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
+ rtnl_lock();
- if (time_after(jiffies, warning_time + 10 * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
- dev->name, refcnt);
- warning_time = jiffies;
- }
+ list_for_each_entry(dev, &list, todo_list) {
+ /* Rebroadcast unregister notification */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
}
- /* paranoia */
- BUG_ON(netdev_refcnt_read(dev));
- BUG_ON(!list_empty(&dev->ptype_all));
- BUG_ON(!list_empty(&dev->ptype_specific));
- WARN_ON(rcu_access_pointer(dev->ip_ptr));
- WARN_ON(rcu_access_pointer(dev->ip6_ptr));
- WARN_ON(dev->dn_ptr);
+ __rtnl_unlock();
+ rcu_barrier();
+ rtnl_lock();
- if (dev->destructor)
- dev->destructor(dev);
+ list_for_each_entry(dev, &list, todo_list) {
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &dev->state)) {
+ /* We must not have linkwatch events
+ * pending on unregister. If this
+ * happens, we simply run the queue
+ * unscheduled, resulting in a noop
+ * for this device.
+ */
+ linkwatch_run_queue();
+ }
+ }
- /* Report a network device has been unregistered */
- rtnl_lock();
- dev_net(dev)->dev_unreg_count--;
__rtnl_unlock();
- wake_up(&netdev_unregistering_wq);
- /* Free network device */
- kobject_put(&dev->dev.kobj);
+ rebroadcast_time = jiffies;
+ }
+
+ msleep(250);
+
+ if (time_after(jiffies, warning_time + 10 * HZ)) {
+ list_for_each_entry(dev, &list, todo_list) {
+ pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ dev->name, netdev_refcnt_read(dev));
+ }
+ warning_time = jiffies;
}
+
+ goto again;
}
/* Convert net_device_stats to rtnl_link_stats64. They have the same
--
2.4.1
^ permalink raw reply related [flat|nested] 7+ messages in thread