* [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue @ 2010-04-13 15:38 Changli Gao 2010-04-13 8:08 ` Eric Dumazet 0 siblings, 1 reply; 11+ messages in thread From: Changli Gao @ 2010-04-13 15:38 UTC (permalink / raw) To: David S. Miller; +Cc: Eric Dumazet, netdev, Changli Gao batch skb dequeueing from softnet input_pkt_queue batch skb dequeueing from softnet input_pkt_queue to reduce potential lock contention and irq disabling/enabling. Signed-off-by: Changli Gao <xiaosuo@gmail.com> ---- include/linux/netdevice.h | 2 ++ net/core/dev.c | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d1a21b5..bc7a0d7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1335,6 +1335,8 @@ struct softnet_data { struct call_single_data csd ____cacheline_aligned_in_smp; #endif struct sk_buff_head input_pkt_queue; + struct sk_buff_head processing_queue; + volatile bool flush_processing_queue; struct napi_struct backlog; }; diff --git a/net/core/dev.c b/net/core/dev.c index a10a216..ac24293 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2324,6 +2324,11 @@ static void trigger_softirq(void *data) } #endif /* CONFIG_SMP */ +static inline u32 softnet_input_qlen(struct softnet_data *queue) +{ + return queue->input_pkt_queue.qlen + queue->processing_queue.qlen; +} + /* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). @@ -2339,8 +2344,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu) __get_cpu_var(netdev_rx_stat).total++; rps_lock(queue); - if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { - if (queue->input_pkt_queue.qlen) { + if (softnet_input_qlen(queue) <= netdev_max_backlog) { + if (softnet_input_qlen(queue)) { enqueue: __skb_queue_tail(&queue->input_pkt_queue, skb); rps_unlock(queue); @@ -2803,6 +2808,7 @@ static void flush_backlog(void *arg) __skb_unlink(skb, &queue->input_pkt_queue); kfree_skb(skb); } + queue->flush_processing_queue = true; rps_unlock(queue); } @@ -3112,14 +3118,23 @@ static int process_backlog(struct napi_struct *napi, int quota) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; + if (queue->flush_processing_queue) { + struct sk_buff *skb; + + queue->flush_processing_queue = false; + while ((skb = __skb_dequeue(&queue->processing_queue))) + kfree_skb(skb); + } + napi->weight = weight_p; do { struct sk_buff *skb; local_irq_disable(); rps_lock(queue); - skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) { + skb_queue_splice_tail_init(&queue->input_pkt_queue, + &queue->processing_queue); + if (skb_queue_empty(&queue->processing_queue)) { __napi_complete(napi); rps_unlock(queue); local_irq_enable(); @@ -3128,9 +3143,22 @@ static int process_backlog(struct napi_struct *napi, int quota) rps_unlock(queue); local_irq_enable(); - __netif_receive_skb(skb); - } while (++work < quota && jiffies == start_time); + while ((skb = __skb_dequeue(&queue->processing_queue))) { + __netif_receive_skb(skb); + if (++work < quota && jiffies == start_time) + continue; + if (!queue->flush_processing_queue) + goto out; + queue->flush_processing_queue = false; + while ((skb = __skb_dequeue(&queue->processing_queue))) { + __netif_receive_skb(skb); + ++work; + } + goto out; + } + } while (1); +out: return work; } @@ -5487,6 +5515,9 @@ static int dev_cpu_callback(struct notifier_block *nfb, raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); + while ((skb = __skb_dequeue(&oldsd->processing_queue))) + netif_rx(skb); + /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) netif_rx(skb); @@ -5709,6 +5740,8 @@ static int __init net_dev_init(void) queue = &per_cpu(softnet_data, i); skb_queue_head_init(&queue->input_pkt_queue); + skb_queue_head_init(&queue->processing_queue); + queue->flush_processing_queue = false; queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 15:38 [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue Changli Gao @ 2010-04-13 8:08 ` Eric Dumazet 2010-04-13 9:50 ` Changli Gao 0 siblings, 1 reply; 11+ messages in thread From: Eric Dumazet @ 2010-04-13 8:08 UTC (permalink / raw) To: Changli Gao; +Cc: David S. Miller, netdev Le mardi 13 avril 2010 à 23:38 +0800, Changli Gao a écrit : > batch skb dequeueing from softnet input_pkt_queue > > batch skb dequeueing from softnet input_pkt_queue to reduce potential lock > contention and irq disabling/enabling. > Very interesting idea, but implementation is too complex, and probably buggy, in a area that too few people understand today. Could you keep it as simple as possible ? > Signed-off-by: Changli Gao <xiaosuo@gmail.com> > ---- > include/linux/netdevice.h | 2 ++ > net/core/dev.c | 45 +++++++++++++++++++++++++++++++++++++++------ > 2 files changed, 41 insertions(+), 6 deletions(-) > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index d1a21b5..bc7a0d7 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1335,6 +1335,8 @@ struct softnet_data { > struct call_single_data csd ____cacheline_aligned_in_smp; > #endif > struct sk_buff_head input_pkt_queue; > + struct sk_buff_head processing_queue; Probably not necessary. > + volatile bool flush_processing_queue; Use of 'volatile' is strongly discouraged, I would say, forbidden. Its usually a sign of 'I dont exactly what memory ordering I need, so I throw a volatile just in case'. We live in a world full of RCU, read , write, full barriers. And these apis are well documented. > struct napi_struct backlog; > }; > > diff --git a/net/core/dev.c b/net/core/dev.c > index a10a216..ac24293 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -2324,6 +2324,11 @@ static void trigger_softirq(void *data) > } > #endif /* CONFIG_SMP */ > > +static inline u32 softnet_input_qlen(struct softnet_data *queue) > +{ > + return queue->input_pkt_queue.qlen + queue->processing_queue.qlen; > +} > + > /* > * enqueue_to_backlog is called to queue an skb to a per CPU backlog > * queue (may be a remote CPU queue). > @@ -2339,8 +2344,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu) > __get_cpu_var(netdev_rx_stat).total++; > > rps_lock(queue); > - if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { > - if (queue->input_pkt_queue.qlen) { > + if (softnet_input_qlen(queue) <= netdev_max_backlog) { > + if (softnet_input_qlen(queue)) { > enqueue: > __skb_queue_tail(&queue->input_pkt_queue, skb); > rps_unlock(queue); > @@ -2803,6 +2808,7 @@ static void flush_backlog(void *arg) > __skb_unlink(skb, &queue->input_pkt_queue); > kfree_skb(skb); > } > + queue->flush_processing_queue = true; Probably not necessary > rps_unlock(queue); > } > > @@ -3112,14 +3118,23 @@ static int process_backlog(struct napi_struct *napi, int quota) > struct softnet_data *queue = &__get_cpu_var(softnet_data); > unsigned long start_time = jiffies; > > + if (queue->flush_processing_queue) { Really... this is bloat IMHO > + struct sk_buff *skb; > + > + queue->flush_processing_queue = false; > + while ((skb = __skb_dequeue(&queue->processing_queue))) > + kfree_skb(skb); > + } > + > napi->weight = weight_p; > do { > struct sk_buff *skb; > > local_irq_disable(); > rps_lock(queue); > - skb = __skb_dequeue(&queue->input_pkt_queue); > - if (!skb) { > + skb_queue_splice_tail_init(&queue->input_pkt_queue, > + &queue->processing_queue); > + if (skb_queue_empty(&queue->processing_queue)) { > __napi_complete(napi); > rps_unlock(queue); > local_irq_enable(); > @@ -3128,9 +3143,22 @@ static int process_backlog(struct napi_struct *napi, int quota) > rps_unlock(queue); > local_irq_enable(); > > - __netif_receive_skb(skb); > - } while (++work < quota && jiffies == start_time); > + while ((skb = __skb_dequeue(&queue->processing_queue))) { > + __netif_receive_skb(skb); > + if (++work < quota && jiffies == start_time) > + continue; > + if (!queue->flush_processing_queue) > + goto out; > + queue->flush_processing_queue = false; once again, ... so much code for a unlikely event... > + while ((skb = __skb_dequeue(&queue->processing_queue))) { > + __netif_receive_skb(skb); > + ++work; > + } > + goto out; > + } > + } while (1); > > +out: > return work; > } > > @@ -5487,6 +5515,9 @@ static int dev_cpu_callback(struct notifier_block *nfb, > raise_softirq_irqoff(NET_TX_SOFTIRQ); > local_irq_enable(); > > + while ((skb = __skb_dequeue(&oldsd->processing_queue))) > + netif_rx(skb); > + > /* Process offline CPU's input_pkt_queue */ > while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) > netif_rx(skb); > @@ -5709,6 +5740,8 @@ static int __init net_dev_init(void) > > queue = &per_cpu(softnet_data, i); > skb_queue_head_init(&queue->input_pkt_queue); > + skb_queue_head_init(&queue->processing_queue); > + queue->flush_processing_queue = false; > queue->completion_queue = NULL; > INIT_LIST_HEAD(&queue->poll_list); > I advise to keep it simple. My suggestion would be to limit this patch only to process_backlog(). Really if you touch other areas, there is too much risk. Perform sort of skb_queue_splice_tail_init() into a local (stack) queue, but the trick is to not touch input_pkt_queue.qlen, so that we dont slow down enqueue_to_backlog(). Process at most 'quota' skbs (or jiffies limit). relock queue. input_pkt_queue.qlen -= number_of_handled_skbs; In the unlikely event we have unprocessed skbs in local queue, re-insert the remaining skbs at head of input_pt_queue. Consider if input_pkt_queue.qlen is 0 or not, to call __napi_complete(napi); or not :) ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 8:08 ` Eric Dumazet @ 2010-04-13 9:50 ` Changli Gao 2010-04-13 10:19 ` Eric Dumazet 2010-04-13 15:52 ` Paul E. McKenney 0 siblings, 2 replies; 11+ messages in thread From: Changli Gao @ 2010-04-13 9:50 UTC (permalink / raw) To: Eric Dumazet; +Cc: David S. Miller, netdev On Tue, Apr 13, 2010 at 4:08 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > Probably not necessary. > >> + volatile bool flush_processing_queue; > > Use of 'volatile' is strongly discouraged, I would say, forbidden. > volatile is used to avoid compiler optimization. > Its usually a sign of 'I dont exactly what memory ordering I need, so I > throw a volatile just in case'. We live in a world full of RCU, read , > write, full barriers. And these apis are well documented. > There isn't memory accessing order problem. >> @@ -2803,6 +2808,7 @@ static void flush_backlog(void *arg) >> __skb_unlink(skb, &queue->input_pkt_queue); >> kfree_skb(skb); >> } >> + queue->flush_processing_queue = true; > > Probably not necessary > If flush_backlog() is called when there are still packets in processing_queue, there maybe some packets refer to the netdev gone, if we remove this line. >> rps_unlock(queue); >> } >> >> @@ -3112,14 +3118,23 @@ static int process_backlog(struct napi_struct *napi, int quota) >> struct softnet_data *queue = &__get_cpu_var(softnet_data); >> unsigned long start_time = jiffies; >> >> + if (queue->flush_processing_queue) { > > Really... this is bloat IMHO Any better idea? > >> > > I advise to keep it simple. > > My suggestion would be to limit this patch only to process_backlog(). > > Really if you touch other areas, there is too much risk. > > Perform sort of skb_queue_splice_tail_init() into a local (stack) queue, > but the trick is to not touch input_pkt_queue.qlen, so that we dont slow > down enqueue_to_backlog(). > > Process at most 'quota' skbs (or jiffies limit). > > relock queue. > input_pkt_queue.qlen -= number_of_handled_skbs; > Oh no, in order to let latter packets in as soon as possible, we have to update qlen immediately. -- Regards, Changli Gao(xiaosuo@gmail.com) ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 9:50 ` Changli Gao @ 2010-04-13 10:19 ` Eric Dumazet 2010-04-13 10:23 ` Eric Dumazet 2010-04-13 12:53 ` Changli Gao 2010-04-13 15:52 ` Paul E. McKenney 1 sibling, 2 replies; 11+ messages in thread From: Eric Dumazet @ 2010-04-13 10:19 UTC (permalink / raw) To: Changli Gao; +Cc: David S. Miller, netdev Le mardi 13 avril 2010 à 17:50 +0800, Changli Gao a écrit : > On Tue, Apr 13, 2010 at 4:08 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > Probably not necessary. > > > >> + volatile bool flush_processing_queue; > > > > Use of 'volatile' is strongly discouraged, I would say, forbidden. > > > > volatile is used to avoid compiler optimization. volatile might be used on special macros only, not to guard a variable. volatile was pre SMP days. We need something better defined these days. > > > Its usually a sign of 'I dont exactly what memory ordering I need, so I > > throw a volatile just in case'. We live in a world full of RCU, read , > > write, full barriers. And these apis are well documented. > > > > There isn't memory accessing order problem. > Really ? If you persist using this volatile thing, then you'll have to convince Mr Linus Torvald himself. Prepare your best guns, because in fact you'll have to convince about one hundred people that know for sure how volatile is weak. > >> @@ -2803,6 +2808,7 @@ static void flush_backlog(void *arg) > >> __skb_unlink(skb, &queue->input_pkt_queue); > >> kfree_skb(skb); > >> } > >> + queue->flush_processing_queue = true; > > > > Probably not necessary > > > > If flush_backlog() is called when there are still packets in > processing_queue, there maybe some packets refer to the netdev gone, > if we remove this line. We dont need this "processing_queue". Once you remove it, there is no extra work to perform. > > >> rps_unlock(queue); > >> } > >> > >> @@ -3112,14 +3118,23 @@ static int process_backlog(struct napi_struct *napi, int quota) > >> struct softnet_data *queue = &__get_cpu_var(softnet_data); > >> unsigned long start_time = jiffies; > >> > >> + if (queue->flush_processing_queue) { > > > > Really... this is bloat IMHO > > > Any better idea? > Yes, I have explained it to you. > > > >> > > > > I advise to keep it simple. > > > > My suggestion would be to limit this patch only to process_backlog(). > > > > Really if you touch other areas, there is too much risk. > > > > Perform sort of skb_queue_splice_tail_init() into a local (stack) queue, > > but the trick is to not touch input_pkt_queue.qlen, so that we dont slow > > down enqueue_to_backlog(). > > > > Process at most 'quota' skbs (or jiffies limit). > > > > relock queue. > > input_pkt_queue.qlen -= number_of_handled_skbs; > > > > Oh no, in order to let latter packets in as soon as possible, we have > to update qlen immediately. > Absolutely not. You missed something apparently. You pay the price at each packet enqueue, because you have to compute the sum of two lengthes, and guess what, if you do this you have a cache line miss in one of the operand. Your patch as is is suboptimal. Remember : this batch mode should not change packet queueing at all, only speed it because of less cache line misses. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 10:19 ` Eric Dumazet @ 2010-04-13 10:23 ` Eric Dumazet 2010-04-13 12:53 ` Changli Gao 1 sibling, 0 replies; 11+ messages in thread From: Eric Dumazet @ 2010-04-13 10:23 UTC (permalink / raw) To: Changli Gao; +Cc: David S. Miller, netdev Le mardi 13 avril 2010 à 12:19 +0200, Eric Dumazet a écrit : > Le mardi 13 avril 2010 à 17:50 +0800, Changli Gao a écrit : > > On Tue, Apr 13, 2010 at 4:08 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > Use of 'volatile' is strongly discouraged, I would say, forbidden. > > > > > > > volatile is used to avoid compiler optimization. > > volatile might be used on special macros only, not to guard a variable. > volatile was pre SMP days. We need something better defined these days. > > > > > > Its usually a sign of 'I dont exactly what memory ordering I need, so I > > > throw a volatile just in case'. We live in a world full of RCU, read , > > > write, full barriers. And these apis are well documented. > > > > > > > There isn't memory accessing order problem. > > > > Really ? If you persist using this volatile thing, then you'll have to > convince Mr Linus Torvald himself. Prepare your best guns, because in > fact you'll have to convince about one hundred people that know for sure > how volatile is weak. $ cat Documentation/volatile-considered-harmful.txt Why the "volatile" type class should not be used ------------------------------------------------ C programmers have often taken volatile to mean that the variable could be changed outside of the current thread of execution; as a result, they are sometimes tempted to use it in kernel code when shared data structures are being used. In other words, they have been known to treat volatile types as a sort of easy atomic variable, which they are not. The use of volatile in kernel code is almost never correct; this document describes why. The key point to understand with regard to volatile is that its purpose is to suppress optimization, which is almost never what one really wants to do. In the kernel, one must protect shared data structures against unwanted concurrent access, which is very much a different task. The process of protecting against unwanted concurrency will also avoid almost all optimization-related problems in a more efficient way. Like volatile, the kernel primitives which make concurrent access to data safe (spinlocks, mutexes, memory barriers, etc.) are designed to prevent unwanted optimization. If they are being used properly, there will be no need to use volatile as well. If volatile is still necessary, there is almost certainly a bug in the code somewhere. In properly-written kernel code, volatile can only serve to slow things down. Consider a typical block of kernel code: spin_lock(&the_lock); do_something_on(&shared_data); do_something_else_with(&shared_data); spin_unlock(&the_lock); If all the code follows the locking rules, the value of shared_data cannot change unexpectedly while the_lock is held. Any other code which might want to play with that data will be waiting on the lock. The spinlock primitives act as memory barriers - they are explicitly written to do so - meaning that data accesses will not be optimized across them. So the compiler might think it knows what will be in shared_data, but the spin_lock() call, since it acts as a memory barrier, will force it to forget anything it knows. There will be no optimization problems with accesses to that data. If shared_data were declared volatile, the locking would still be necessary. But the compiler would also be prevented from optimizing access to shared_data _within_ the critical section, when we know that nobody else can be working with it. While the lock is held, shared_data is not volatile. When dealing with shared data, proper locking makes volatile unnecessary - and potentially harmful. The volatile storage class was originally meant for memory-mapped I/O registers. Within the kernel, register accesses, too, should be protected by locks, but one also does not want the compiler "optimizing" register accesses within a critical section. But, within the kernel, I/O memory accesses are always done through accessor functions; accessing I/O memory directly through pointers is frowned upon and does not work on all architectures. Those accessors are written to prevent unwanted optimization, so, once again, volatile is unnecessary. Another situation where one might be tempted to use volatile is when the processor is busy-waiting on the value of a variable. The right way to perform a busy wait is: while (my_variable != what_i_want) cpu_relax(); The cpu_relax() call can lower CPU power consumption or yield to a hyperthreaded twin processor; it also happens to serve as a compiler barrier, so, once again, volatile is unnecessary. Of course, busy- waiting is generally an anti-social act to begin with. There are still a few rare situations where volatile makes sense in the kernel: - The above-mentioned accessor functions might use volatile on architectures where direct I/O memory access does work. Essentially, each accessor call becomes a little critical section on its own and ensures that the access happens as expected by the programmer. - Inline assembly code which changes memory, but which has no other visible side effects, risks being deleted by GCC. Adding the volatile keyword to asm statements will prevent this removal. - The jiffies variable is special in that it can have a different value every time it is referenced, but it can be read without any special locking. So jiffies can be volatile, but the addition of other variables of this type is strongly frowned upon. Jiffies is considered to be a "stupid legacy" issue (Linus's words) in this regard; fixing it would be more trouble than it is worth. - Pointers to data structures in coherent memory which might be modified by I/O devices can, sometimes, legitimately be volatile. A ring buffer used by a network adapter, where that adapter changes pointers to indicate which descriptors have been processed, is an example of this type of situation. For most code, none of the above justifications for volatile apply. As a result, the use of volatile is likely to be seen as a bug and will bring additional scrutiny to the code. Developers who are tempted to use volatile should take a step back and think about what they are truly trying to accomplish. Patches to remove volatile variables are generally welcome - as long as they come with a justification which shows that the concurrency issues have been properly thought through. NOTES ----- [1] http://lwn.net/Articles/233481/ [2] http://lwn.net/Articles/233482/ CREDITS ------- Original impetus and research by Randy Dunlap Written by Jonathan Corbet Improvements via comments from Satyam Sharma, Johannes Stezenbach, Jesper Juhl, Heikki Orsila, H. Peter Anvin, Philipp Hahn, and Stefan Richter. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 10:19 ` Eric Dumazet 2010-04-13 10:23 ` Eric Dumazet @ 2010-04-13 12:53 ` Changli Gao 2010-04-13 13:21 ` Eric Dumazet 1 sibling, 1 reply; 11+ messages in thread From: Changli Gao @ 2010-04-13 12:53 UTC (permalink / raw) To: Eric Dumazet; +Cc: David S. Miller, netdev On Tue, Apr 13, 2010 at 6:19 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Le mardi 13 avril 2010 à 17:50 +0800, Changli Gao a écrit : >> On Tue, Apr 13, 2010 at 4:08 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: >> > >> > Probably not necessary. >> > >> >> + volatile bool flush_processing_queue; >> > >> > Use of 'volatile' is strongly discouraged, I would say, forbidden. >> > >> >> volatile is used to avoid compiler optimization. > > volatile might be used on special macros only, not to guard a variable. > volatile was pre SMP days. We need something better defined these days. > flush_processing_queue is only accessed on the same CPU, so no volatile is needed. I'll remove it in the next version. >> >> @@ -2803,6 +2808,7 @@ static void flush_backlog(void *arg) >> >> __skb_unlink(skb, &queue->input_pkt_queue); >> >> kfree_skb(skb); >> >> } >> >> + queue->flush_processing_queue = true; >> > >> > Probably not necessary >> > >> >> If flush_backlog() is called when there are still packets in >> processing_queue, there maybe some packets refer to the netdev gone, >> if we remove this line. > > We dont need this "processing_queue". Once you remove it, there is no > extra work to perform. OK. If we make processing_queue is a stack variable. When quota or jiffies limit is reached, we have to splice processing_queue back to input_pkt_queue. If flush_backlog() is called before the processing_queue is spliced, there will still packets which refer to the NIC going. Then these packets are queued to input_pkt_queue. When process_backlog() is called again, the dev field of these skbs are wild... Oh, my GOD. When RPS is enabled, if flush_backlog(eth0) is called on CPU1 when a skb0(eth0) is dequeued from CPU0's softnet and isn't queued to CPU1's softnet, what will happen? > >> > >> >> >> > >> > I advise to keep it simple. >> > >> > My suggestion would be to limit this patch only to process_backlog(). >> > >> > Really if you touch other areas, there is too much risk. >> > >> > Perform sort of skb_queue_splice_tail_init() into a local (stack) queue, >> > but the trick is to not touch input_pkt_queue.qlen, so that we dont slow >> > down enqueue_to_backlog(). >> > >> > Process at most 'quota' skbs (or jiffies limit). >> > >> > relock queue. >> > input_pkt_queue.qlen -= number_of_handled_skbs; >> > >> >> Oh no, in order to let latter packets in as soon as possible, we have >> to update qlen immediately. >> > > Absolutely not. You missed something apparently. > > You pay the price at each packet enqueue, because you have to compute > the sum of two lengthes, and guess what, if you do this you have a cache > line miss in one of the operand. Your patch as is is suboptimal. > > Remember : this batch mode should not change packet queueing at all, > only speed it because of less cache line misses. > WoW, is it really so expensive? -- Regards, Changli Gao(xiaosuo@gmail.com) ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 12:53 ` Changli Gao @ 2010-04-13 13:21 ` Eric Dumazet 2010-04-13 13:38 ` Changli Gao 0 siblings, 1 reply; 11+ messages in thread From: Eric Dumazet @ 2010-04-13 13:21 UTC (permalink / raw) To: Changli Gao; +Cc: David S. Miller, netdev Le mardi 13 avril 2010 à 20:53 +0800, Changli Gao a écrit : > OK. If we make processing_queue is a stack variable. When quota or > jiffies limit is reached, we have to splice processing_queue back to > input_pkt_queue. If flush_backlog() is called before the > processing_queue is spliced, there will still packets which refer to > the NIC going. Then these packets are queued to input_pkt_queue. When > process_backlog() is called again, the dev field of these skbs are > wild... > This is a problem of cooperation between flush_backlog() and process_backlog(). Dont allow flush_backlog() to return if process_backlog() is running. Exactly as before, but lock acquisition done in flush_backlog() should be a bit smarter. > Oh, my GOD. When RPS is enabled, if flush_backlog(eth0) is called on > CPU1 when a skb0(eth0) is dequeued from CPU0's softnet and isn't > queued to CPU1's softnet, what will happen? > I am a bit lost here. flush_backlog() drops skbs, not requeue them. > > > > Absolutely not. You missed something apparently. > > > > You pay the price at each packet enqueue, because you have to compute > > the sum of two lengthes, and guess what, if you do this you have a cache > > line miss in one of the operand. Your patch as is is suboptimal. > > > > Remember : this batch mode should not change packet queueing at all, > > only speed it because of less cache line misses. > > > > WoW, is it really so expensive? > Yes. Whole point of your idea is to remove cache line misses. They cost much more than a spinlock/unlock pair ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 13:21 ` Eric Dumazet @ 2010-04-13 13:38 ` Changli Gao 2010-04-13 14:37 ` Eric Dumazet 0 siblings, 1 reply; 11+ messages in thread From: Changli Gao @ 2010-04-13 13:38 UTC (permalink / raw) To: Eric Dumazet; +Cc: David S. Miller, netdev On Tue, Apr 13, 2010 at 9:21 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > This is a problem of cooperation between flush_backlog() and > process_backlog(). Dont allow flush_backlog() to return if > process_backlog() is running. Exactly as before, but lock acquisition > done in flush_backlog() should be a bit smarter. > flush_backlog() is called in IRQ context. Unless you disable irq in process_backlog(), you can't block flush_backlog(). > >> Oh, my GOD. When RPS is enabled, if flush_backlog(eth0) is called on >> CPU1 when a skb0(eth0) is dequeued from CPU0's softnet and isn't >> queued to CPU1's softnet, what will happen? >> > > I am a bit lost here. flush_backlog() drops skbs, not requeue them. > I mean flush_backlog() don't drop all the packets, whose dev point to a special net_device and can't be processed before the net_device disappers. -- Regards, Changli Gao(xiaosuo@gmail.com) ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 13:38 ` Changli Gao @ 2010-04-13 14:37 ` Eric Dumazet 0 siblings, 0 replies; 11+ messages in thread From: Eric Dumazet @ 2010-04-13 14:37 UTC (permalink / raw) To: Changli Gao; +Cc: David S. Miller, netdev Le mardi 13 avril 2010 à 21:38 +0800, Changli Gao a écrit : > On Tue, Apr 13, 2010 at 9:21 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > This is a problem of cooperation between flush_backlog() and > > process_backlog(). Dont allow flush_backlog() to return if > > process_backlog() is running. Exactly as before, but lock acquisition > > done in flush_backlog() should be a bit smarter. > > > > flush_backlog() is called in IRQ context. Unless you disable irq in > process_backlog(), you can't block flush_backlog(). > There is nothing preventing flush_backlog() to be done differently you know. It was done like that because it was the most simple thing to do given the (basic) constraints. Now if the constraints change, implementation might change too. It is slow path (in most setups) and some extra work to keep fast path really fast is ok. netdevice are dismantled and we respect an RCU grace period before freeing. process_backlog() is done inside a rcu lock, so everything is possible. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 9:50 ` Changli Gao 2010-04-13 10:19 ` Eric Dumazet @ 2010-04-13 15:52 ` Paul E. McKenney 2010-04-13 22:43 ` Changli Gao 1 sibling, 1 reply; 11+ messages in thread From: Paul E. McKenney @ 2010-04-13 15:52 UTC (permalink / raw) To: Changli Gao; +Cc: Eric Dumazet, David S. Miller, netdev On Tue, Apr 13, 2010 at 05:50:29PM +0800, Changli Gao wrote: > On Tue, Apr 13, 2010 at 4:08 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > Probably not necessary. > > > >> + volatile bool flush_processing_queue; > > > > Use of 'volatile' is strongly discouraged, I would say, forbidden. > > volatile is used to avoid compiler optimization. Would it be reasonable to use ACCESS_ONCE() where this variable is used? Thanx, Paul > > Its usually a sign of 'I dont exactly what memory ordering I need, so I > > throw a volatile just in case'. We live in a world full of RCU, read , > > write, full barriers. And these apis are well documented. > > > > There isn't memory accessing order problem. > > >> @@ -2803,6 +2808,7 @@ static void flush_backlog(void *arg) > >> __skb_unlink(skb, &queue->input_pkt_queue); > >> kfree_skb(skb); > >> } > >> + queue->flush_processing_queue = true; > > > > Probably not necessary > > > > If flush_backlog() is called when there are still packets in > processing_queue, there maybe some packets refer to the netdev gone, > if we remove this line. > > >> rps_unlock(queue); > >> } > >> > >> @@ -3112,14 +3118,23 @@ static int process_backlog(struct napi_struct *napi, int quota) > >> struct softnet_data *queue = &__get_cpu_var(softnet_data); > >> unsigned long start_time = jiffies; > >> > >> + if (queue->flush_processing_queue) { > > > > Really... this is bloat IMHO > > > Any better idea? > > > > >> > > > > I advise to keep it simple. > > > > My suggestion would be to limit this patch only to process_backlog(). > > > > Really if you touch other areas, there is too much risk. > > > > Perform sort of skb_queue_splice_tail_init() into a local (stack) queue, > > but the trick is to not touch input_pkt_queue.qlen, so that we dont slow > > down enqueue_to_backlog(). > > > > Process at most 'quota' skbs (or jiffies limit). > > > > relock queue. > > input_pkt_queue.qlen -= number_of_handled_skbs; > > > > Oh no, in order to let latter packets in as soon as possible, we have > to update qlen immediately. > > -- > Regards, > Changli Gao(xiaosuo@gmail.com) > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue 2010-04-13 15:52 ` Paul E. McKenney @ 2010-04-13 22:43 ` Changli Gao 0 siblings, 0 replies; 11+ messages in thread From: Changli Gao @ 2010-04-13 22:43 UTC (permalink / raw) To: paulmck; +Cc: Eric Dumazet, David S. Miller, netdev On Tue, Apr 13, 2010 at 11:52 PM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > On Tue, Apr 13, 2010 at 05:50:29PM +0800, Changli Gao wrote: >> On Tue, Apr 13, 2010 at 4:08 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: >> > >> > Probably not necessary. >> > >> >> + volatile bool flush_processing_queue; >> > >> > Use of 'volatile' is strongly discouraged, I would say, forbidden. >> >> volatile is used to avoid compiler optimization. > > Would it be reasonable to use ACCESS_ONCE() where this variable is used? Oh, thanks. ACCESS_ONCE() is just what I need. -- Regards, Changli Gao(xiaosuo@gmail.com) ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-04-13 22:43 UTC | newest] Thread overview: 11+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-04-13 15:38 [PATCH v2] net: batch skb dequeueing from softnet input_pkt_queue Changli Gao 2010-04-13 8:08 ` Eric Dumazet 2010-04-13 9:50 ` Changli Gao 2010-04-13 10:19 ` Eric Dumazet 2010-04-13 10:23 ` Eric Dumazet 2010-04-13 12:53 ` Changli Gao 2010-04-13 13:21 ` Eric Dumazet 2010-04-13 13:38 ` Changli Gao 2010-04-13 14:37 ` Eric Dumazet 2010-04-13 15:52 ` Paul E. McKenney 2010-04-13 22:43 ` Changli Gao
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).