netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] net: flush the softnet backlog in process context
@ 2016-08-25 13:58 Paolo Abeni
  2016-08-25 14:32 ` Eric Dumazet
  2016-08-26 18:51 ` David Miller
  0 siblings, 2 replies; 6+ messages in thread
From: Paolo Abeni @ 2016-08-25 13:58 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Eric Dumazet, Tom Herbert, Hannes Frederic Sowa

Currently in process_backlog(), the process_queue dequeuing is
performed with local IRQ disabled, to protect against
flush_backlog(), which runs in hard IRQ context.

This patch moves the flush operation to a work queue and runs the
callback with bottom half disabled to protect the process_queue
against dequeuing.
Since process_queue is now always manipulated in bottom half context,
the irq disable/enable pair around the dequeue operation are removed.

To keep the flush time as low as possible, the flush
works are scheduled on all online cpu simultaneously, using the
high priority work-queue and statically allocated, per cpu,
work structs.

Overall this change increases the time required to destroy a device
to improve slightly the packets reinjection performances.

Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
rfc -> v1: rebased

 net/core/dev.c | 72 ++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 50 insertions(+), 22 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index a75df86..7feae74 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4292,15 +4292,25 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
+struct flush_work {
+	struct net_device *dev;
+	struct work_struct work;
+};
+
+DEFINE_PER_CPU(struct flush_work, flush_works);
+
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
 {
-	struct net_device *dev = arg;
-	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+	struct flush_work *flush = container_of(work, typeof(*flush), work);
+	struct net_device *dev = flush->dev;
 	struct sk_buff *skb, *tmp;
+	struct softnet_data *sd;
+
+	local_bh_disable();
+	sd = this_cpu_ptr(&softnet_data);
 
+	local_irq_disable();
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
@@ -4310,6 +4320,7 @@ static void flush_backlog(void *arg)
 		}
 	}
 	rps_unlock(sd);
+	local_irq_enable();
 
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 		if (skb->dev == dev) {
@@ -4318,6 +4329,27 @@ static void flush_backlog(void *arg)
 			input_queue_head_incr(sd);
 		}
 	}
+	local_bh_enable();
+}
+
+static void flush_all_backlogs(struct net_device *dev)
+{
+	unsigned int cpu;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		struct flush_work *flush = per_cpu_ptr(&flush_works, cpu);
+
+		INIT_WORK(&flush->work, flush_backlog);
+		flush->dev = dev;
+		queue_work_on(cpu, system_highpri_wq, &flush->work);
+	}
+
+	for_each_online_cpu(cpu)
+		flush_work(&per_cpu_ptr(&flush_works, cpu)->work);
+
+	put_online_cpus();
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -4805,8 +4837,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
 
 static int process_backlog(struct napi_struct *napi, int quota)
 {
-	int work = 0;
 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+	bool again = true;
+	int work = 0;
 
 	/* Check if we have pending ipi, its better to send them now,
 	 * not waiting net_rx_action() end.
@@ -4817,23 +4850,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	}
 
 	napi->weight = weight_p;
-	local_irq_disable();
-	while (1) {
+	while (again) {
 		struct sk_buff *skb;
 
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			rcu_read_lock();
-			local_irq_enable();
 			__netif_receive_skb(skb);
 			rcu_read_unlock();
-			local_irq_disable();
 			input_queue_head_incr(sd);
-			if (++work >= quota) {
-				local_irq_enable();
+			if (++work >= quota)
 				return work;
-			}
+
 		}
 
+		local_irq_disable();
 		rps_lock(sd);
 		if (skb_queue_empty(&sd->input_pkt_queue)) {
 			/*
@@ -4845,16 +4875,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			 * and we dont need an smp_mb() memory barrier.
 			 */
 			napi->state = 0;
-			rps_unlock(sd);
-
-			break;
+			again = false;
+		} else {
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
 		}
-
-		skb_queue_splice_tail_init(&sd->input_pkt_queue,
-					   &sd->process_queue);
 		rps_unlock(sd);
+		local_irq_enable();
 	}
-	local_irq_enable();
 
 	return work;
 }
@@ -6707,7 +6735,7 @@ static void rollback_registered_many(struct list_head *head)
 		unlist_netdevice(dev);
 
 		dev->reg_state = NETREG_UNREGISTERING;
-		on_each_cpu(flush_backlog, dev, 1);
+		flush_all_backlogs(dev);
 	}
 
 	synchronize_net();
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: flush the softnet backlog in process context
  2016-08-25 13:58 [PATCH net-next] net: flush the softnet backlog in process context Paolo Abeni
@ 2016-08-25 14:32 ` Eric Dumazet
  2016-08-25 14:47   ` Eric Dumazet
  2016-08-26 18:51 ` David Miller
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2016-08-25 14:32 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: netdev, David S. Miller, Eric Dumazet, Tom Herbert,
	Hannes Frederic Sowa

On Thu, 2016-08-25 at 15:58 +0200, Paolo Abeni wrote:
> Currently in process_backlog(), the process_queue dequeuing is
> performed with local IRQ disabled, to protect against
> flush_backlog(), which runs in hard IRQ context.

Acked-by: Eric Dumazet <edumazet@google.com>

> @@ -6707,7 +6735,7 @@ static void rollback_registered_many(struct list_head *head)
>  		unlist_netdevice(dev);
>  
>  		dev->reg_state = NETREG_UNREGISTERING;
> -		on_each_cpu(flush_backlog, dev, 1);
> +		flush_all_backlogs(dev);
>  	}
>  
>  	synchronize_net();

In a future patch, we could change this so that we kick
flush_all_backlogs() once for all devices, instead of one device at a
time.

We would not pass @dev anymore as a parameter and simply look at
skb->dev->reg_state to decide to remove packets from queues in
flush_backlog()

Batching matters for some guys using hundred of devices and suddenly
removing them all in one go.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: flush the softnet backlog in process context
  2016-08-25 14:32 ` Eric Dumazet
@ 2016-08-25 14:47   ` Eric Dumazet
  2016-08-25 14:58     ` Paolo Abeni
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2016-08-25 14:47 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: netdev, David S. Miller, Eric Dumazet, Tom Herbert,
	Hannes Frederic Sowa

On Thu, 2016-08-25 at 07:32 -0700, Eric Dumazet wrote:

> In a future patch, we could change this so that we kick
> flush_all_backlogs() once for all devices, instead of one device at a
> time.
> 
> We would not pass @dev anymore as a parameter and simply look at
> skb->dev->reg_state to decide to remove packets from queues in
> flush_backlog()

This would be something like :

diff --git a/net/core/dev.c b/net/core/dev.c
index 7feae74ca928..793ace2c600f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4293,7 +4293,6 @@ int netif_receive_skb(struct sk_buff *skb)
 EXPORT_SYMBOL(netif_receive_skb);
 
 struct flush_work {
-	struct net_device *dev;
 	struct work_struct work;
 };
 
@@ -4303,7 +4302,6 @@ DEFINE_PER_CPU(struct flush_work, flush_works);
 static void flush_backlog(struct work_struct *work)
 {
 	struct flush_work *flush = container_of(work, typeof(*flush), work);
-	struct net_device *dev = flush->dev;
 	struct sk_buff *skb, *tmp;
 	struct softnet_data *sd;
 
@@ -4313,7 +4311,7 @@ static void flush_backlog(struct work_struct *work)
 	local_irq_disable();
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
-		if (skb->dev == dev) {
+		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
@@ -4323,7 +4321,7 @@ static void flush_backlog(struct work_struct *work)
 	local_irq_enable();
 
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
-		if (skb->dev == dev) {
+		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
@@ -4332,7 +4330,7 @@ static void flush_backlog(struct work_struct *work)
 	local_bh_enable();
 }
 
-static void flush_all_backlogs(struct net_device *dev)
+static void flush_all_backlogs(void)
 {
 	unsigned int cpu;
 
@@ -4342,7 +4340,6 @@ static void flush_all_backlogs(struct net_device *dev)
 		struct flush_work *flush = per_cpu_ptr(&flush_works, cpu);
 
 		INIT_WORK(&flush->work, flush_backlog);
-		flush->dev = dev;
 		queue_work_on(cpu, system_highpri_wq, &flush->work);
 	}
 
@@ -6735,8 +6732,8 @@ static void rollback_registered_many(struct list_head *head)
 		unlist_netdevice(dev);
 
 		dev->reg_state = NETREG_UNREGISTERING;
-		flush_all_backlogs(dev);
 	}
+	flush_all_backlogs();
 
 	synchronize_net();
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: flush the softnet backlog in process context
  2016-08-25 14:47   ` Eric Dumazet
@ 2016-08-25 14:58     ` Paolo Abeni
  2016-08-25 16:25       ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Abeni @ 2016-08-25 14:58 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, David S. Miller, Tom Herbert, Hannes Frederic Sowa

On Thu, 2016-08-25 at 07:47 -0700, Eric Dumazet wrote:
> On Thu, 2016-08-25 at 07:32 -0700, Eric Dumazet wrote:
> 
> > In a future patch, we could change this so that we kick
> > flush_all_backlogs() once for all devices, instead of one device at a
> > time.
> > 
> > We would not pass @dev anymore as a parameter and simply look at
> > skb->dev->reg_state to decide to remove packets from queues in
> > flush_backlog()
> 
> This would be something like :

This is actually a nice cleanup. I hope to test it later.

> diff --git a/net/core/dev.c b/net/core/dev.c
> index 7feae74ca928..793ace2c600f 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4293,7 +4293,6 @@ int netif_receive_skb(struct sk_buff *skb)
>  EXPORT_SYMBOL(netif_receive_skb);
>  
>  struct flush_work {
> -	struct net_device *dev;
>  	struct work_struct work;
>  };

With 'dev' removal, I think we can use directly 'work_struct'  and avoid
'container_of' usage in flush_backlog().

Paolo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: flush the softnet backlog in process context
  2016-08-25 14:58     ` Paolo Abeni
@ 2016-08-25 16:25       ` Eric Dumazet
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2016-08-25 16:25 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: netdev, David S. Miller, Tom Herbert, Hannes Frederic Sowa

On Thu, 2016-08-25 at 16:58 +0200, Paolo Abeni wrote:
> On Thu, 2016-08-25 at 07:47 -0700, Eric Dumazet wrote:
> > On Thu, 2016-08-25 at 07:32 -0700, Eric Dumazet wrote:
> > 
> > > In a future patch, we could change this so that we kick
> > > flush_all_backlogs() once for all devices, instead of one device at a
> > > time.
> > > 
> > > We would not pass @dev anymore as a parameter and simply look at
> > > skb->dev->reg_state to decide to remove packets from queues in
> > > flush_backlog()
> > 
> > This would be something like :
> 
> This is actually a nice cleanup. I hope to test it later.
> 
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index 7feae74ca928..793ace2c600f 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -4293,7 +4293,6 @@ int netif_receive_skb(struct sk_buff *skb)
> >  EXPORT_SYMBOL(netif_receive_skb);
> >  
> >  struct flush_work {
> > -	struct net_device *dev;
> >  	struct work_struct work;
> >  };
> 
> With 'dev' removal, I think we can use directly 'work_struct'  and avoid
> 'container_of' usage in flush_backlog().

Sure. Here is the patch I will propose :

diff --git a/net/core/dev.c b/net/core/dev.c
index 7feae74ca928..996107f041a3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4292,18 +4292,11 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-struct flush_work {
-	struct net_device *dev;
-	struct work_struct work;
-};
-
-DEFINE_PER_CPU(struct flush_work, flush_works);
+DEFINE_PER_CPU(struct work_struct, flush_works);
 
 /* Network device is going away, flush any packets still pending */
 static void flush_backlog(struct work_struct *work)
 {
-	struct flush_work *flush = container_of(work, typeof(*flush), work);
-	struct net_device *dev = flush->dev;
 	struct sk_buff *skb, *tmp;
 	struct softnet_data *sd;
 
@@ -4313,7 +4306,7 @@ static void flush_backlog(struct work_struct *work)
 	local_irq_disable();
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
-		if (skb->dev == dev) {
+		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
@@ -4323,7 +4316,7 @@ static void flush_backlog(struct work_struct *work)
 	local_irq_enable();
 
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
-		if (skb->dev == dev) {
+		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
@@ -4332,22 +4325,18 @@ static void flush_backlog(struct work_struct *work)
 	local_bh_enable();
 }
 
-static void flush_all_backlogs(struct net_device *dev)
+static void flush_all_backlogs(void)
 {
 	unsigned int cpu;
 
 	get_online_cpus();
 
-	for_each_online_cpu(cpu) {
-		struct flush_work *flush = per_cpu_ptr(&flush_works, cpu);
-
-		INIT_WORK(&flush->work, flush_backlog);
-		flush->dev = dev;
-		queue_work_on(cpu, system_highpri_wq, &flush->work);
-	}
+	for_each_online_cpu(cpu)
+		queue_work_on(cpu, system_highpri_wq,
+			      per_cpu_ptr(&flush_works, cpu));
 
 	for_each_online_cpu(cpu)
-		flush_work(&per_cpu_ptr(&flush_works, cpu)->work);
+		flush_work(per_cpu_ptr(&flush_works, cpu));
 
 	put_online_cpus();
 }
@@ -6735,8 +6724,8 @@ static void rollback_registered_many(struct list_head *head)
 		unlist_netdevice(dev);
 
 		dev->reg_state = NETREG_UNREGISTERING;
-		flush_all_backlogs(dev);
 	}
+	flush_all_backlogs();
 
 	synchronize_net();
 
@@ -8301,8 +8290,11 @@ static int __init net_dev_init(void)
 	 */
 
 	for_each_possible_cpu(i) {
+		struct work_struct *flush = per_cpu_ptr(&flush_works, i);
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
+		INIT_WORK(flush, flush_backlog);
+
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		INIT_LIST_HEAD(&sd->poll_list);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] net: flush the softnet backlog in process context
  2016-08-25 13:58 [PATCH net-next] net: flush the softnet backlog in process context Paolo Abeni
  2016-08-25 14:32 ` Eric Dumazet
@ 2016-08-26 18:51 ` David Miller
  1 sibling, 0 replies; 6+ messages in thread
From: David Miller @ 2016-08-26 18:51 UTC (permalink / raw)
  To: pabeni; +Cc: netdev, edumazet, tom, hannes

From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 25 Aug 2016 15:58:44 +0200

> Currently in process_backlog(), the process_queue dequeuing is
> performed with local IRQ disabled, to protect against
> flush_backlog(), which runs in hard IRQ context.
> 
> This patch moves the flush operation to a work queue and runs the
> callback with bottom half disabled to protect the process_queue
> against dequeuing.
> Since process_queue is now always manipulated in bottom half context,
> the irq disable/enable pair around the dequeue operation are removed.
> 
> To keep the flush time as low as possible, the flush
> works are scheduled on all online cpu simultaneously, using the
> high priority work-queue and statically allocated, per cpu,
> work structs.
> 
> Overall this change increases the time required to destroy a device
> to improve slightly the packets reinjection performances.
> 
> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

Applied, thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-08-26 18:51 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-08-25 13:58 [PATCH net-next] net: flush the softnet backlog in process context Paolo Abeni
2016-08-25 14:32 ` Eric Dumazet
2016-08-25 14:47   ` Eric Dumazet
2016-08-25 14:58     ` Paolo Abeni
2016-08-25 16:25       ` Eric Dumazet
2016-08-26 18:51 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).