netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
       [not found] <20040725235927.B30025@electric-eye.fr.zoreil.com>
@ 2004-07-26 15:15 ` Pasi Sjoholm
  2004-07-26 16:37   ` Robert Olsson
  2004-07-26 16:55   ` Stephen Hemminger
  0 siblings, 2 replies; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-26 15:15 UTC (permalink / raw)
  To: Francois Romieu; +Cc: H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad

On Sun, 25 Jul 2004, Francois Romieu wrote:

> Pasi Sjoholm <ptsjohol@cc.jyu.fi> :
> [...]
> > I haven't been able to reproduce this with normal www-browsing or 
> > ssh-connections but it's always reproducible when my eth0 is under heavy 
> > load.
> I guess it can be reproduced even if the binary (nvidia ?) module is never
> loaded after boot, right ?

After 24 hours of hard working I can answer yes to this question. 
Now I can reproduce this from 2 to 15 minutes with 2 cp-processes from 
samba->workstation->nfs, some software building with make -j 3, playing 
some mp3 via nfs to notice when kernel goes down.. =) and so on..

After that ksoftirqd takes almost all the cpu-time and the network is not 
working at all.

First I made some debugging with:

--
readprofile -r
sleep 10
readprofile -n -v -m /boot/System.map-2.6.7-mm7| sort -n +2 | tail -40
--

I tried this tree different times and here are the results:

-1-
c0114020 unshare_files                                 1   0.0104
c0134400 vma_prio_tree_add                             1   0.0057
c01f1c50 fast_clear_page                               1   0.0104
c01f2190 __copy_to_user_ll                             7   0.0625
c017dbf0 write_profile                                 9   0.1406
c02d1f50 net_rx_action                                10   0.0368
00000000 total                                        29   0.0000
-1-

-2-
c0111c80 finish_task_switch                            1   0.0069
c011f9e0 get_signal_to_deliver                         1   0.0012
c01364c0 __kmalloc                                     1   0.0078
c013ad60 copy_page_range                               1   0.0017
c0141370 page_remove_rmap                              1   0.0069
c014a9f0 do_sync_write                                 1   0.0057
c014aaa0 vfs_write                                     1   0.0033
c0195840 start_this_handle                             1   0.0010
c01f1c20 add_timer_randomness                          1   0.0035
c023f4e0 idedisk_release                               1   0.0052
c017dbf0 write_profile                                 5   0.0781
c01f1f30 extract_entropy                               5   0.0060
c02d1f90 rt_may_expire                                14   0.0972
00000000 total                                        34   0.0000
-2-

-3-
c01105b0 do_page_fault                                 1   0.0007
c012d920 find_lock_page                                1   0.0045
c0131130 bad_page                                      1   0.0063
c01ec430 slow_copy_page                                1   0.0208
c01ec2f0 fast_copy_page                                3   0.0117
c017dcf0 kclist_add                                    4   0.0625
c01ec900 copy_from_user                                4   0.0312
c02c5f00 dev_ifname                                    6   0.0341
00000000 total                                        21   0.0000
-3-

There are quite few ticks in total-rows at least when you are comparing to 
normal operation of kernel: 

--
c017dcf0 kclist_add                                    9   0.1406
c0119100 __do_softirq                                268   2.0938
c0101eb0 default_idle                               9396 195.7500
00000000 total                                      9779   0.0043
--

Also made some debugging with patch which Andrew Morton gave to brad
(http://seclists.org/lists/linux-kernel/2004/Mar/2295.html)

--cut--
diff -puN include/linux/kernel.h~proc-sys-debug include/linux/kernel.h
--- 25/include/linux/kernel.h~proc-sys-debug 2004-02-25 23:12:56.000000000 
-0800+++ 25-akpm/include/linux/kernel.h 2004-02-25 23:12:57.000000000 
-0800
@@ -214,6 +214,8 @@ extern void dump_stack(void);
  1; \
 })

+extern int proc_sys_debug[8];
+
 #endif /* __KERNEL__ */

 #define SI_LOAD_SHIFT 16
diff -puN -L kernel/ksyms.c /dev/null /dev/null
diff -puN kernel/sysctl.c~proc-sys-debug kernel/sysctl.c
--- 25/kernel/sysctl.c~proc-sys-debug 2004-02-25 23:12:56.000000000 -0800
+++ 25-akpm/kernel/sysctl.c 2004-02-25 23:21:37.000000000 -0800
@@ -849,7 +849,26 @@ static ctl_table fs_table[] = {
  { .ctl_name = 0 }
 };

+int proc_sys_debug[8];
+EXPORT_SYMBOL(proc_sys_debug);
+
 static ctl_table debug_table[] = {
+ {1, "0", &proc_sys_debug[0], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {2, "1", &proc_sys_debug[1], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {3, "2", &proc_sys_debug[2], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {4, "3", &proc_sys_debug[3], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {5, "4", &proc_sys_debug[4], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {6, "5", &proc_sys_debug[5], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {7, "6", &proc_sys_debug[6], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
+ {8, "7", &proc_sys_debug[7], sizeof(int), 0644, NULL,
+  &proc_dointvec_minmax, &sysctl_intvec, NULL, NULL, NULL},
  { .ctl_name = 0 }
 };


_
 kernel/softirq.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletion(-)

diff -puN kernel/softirq.c~a kernel/softirq.c
--- 25/kernel/softirq.c~a 2004-03-11 02:05:20.000000000 -0800
+++ 25-akpm/kernel/softirq.c 2004-03-11 02:06:02.000000000 -0800
@@ -54,8 +54,13 @@ static inline void wakeup_softirqd(void)
  /* Interrupts are disabled: no need to stop preemption */
  struct task_struct *tsk = __get_cpu_var(ksoftirqd);

- if (tsk && tsk->state != TASK_RUNNING)
+ if (tsk && tsk->state != TASK_RUNNING) {
+  if (proc_sys_debug[0]) {
+   printk("%s wakes ksoftirqd\n", current->comm);
+   dump_stack();
+  }
   wake_up_process(tsk);
+ }
 }
--cut--

and running:

 dmesg -c
 echo 1 > /proc/sys/debug/0 ; sleep 5; echo 0 > /proc/sys/debug/0
 dmesg -s 1000000 > /tmp/foo

but it resulted nothing in /tmp/foo and ksoftirqd was eating my cpu time.

SysRq: Show regs gave this:

--
Pid: 2, comm:          ksoftirqd/0
EIP: 0060:[<e0871224>] CPU: 0
EIP is at rtl8139_poll+0xb4/0x100 [8139too]
 EFLAGS: 00000247    Not tainted  (2.6.7-mm7)
EAX: ffffe000 EBX: 00000040 ECX: df4824f8 EDX: c0441978
ESI: df482400 EDI: e0868000 EBP: dff85f80 DS: 007b ES: 007b
CR0: 8005003b CR2: b7c5a000 CR3: 1fafd000 CR4: 000006d0
 [<c0119580>] ksoftirqd+0x0/0xc0
 [<c02c5f3a>] net_rx_action+0x6a/0x110
 [<c01191a9>] __do_softirq+0xa9/0xb0
 [<c01191d7>] do_softirq+0x27/0x30
 [<c01195e8>] ksoftirqd+0x68/0xc0
 [<c01277e5>] kthread+0xa5/0xb0
 [<c0127740>] kthread+0x0/0xb0
 [<c0102111>] kernel_thread_helper+0x5/0x14
--

I'm not a kernel expert but it would seem that ksoftirqd is in some sort a 
loop because I didn't get any "printk("%s wakes ksoftirqd\n", 
current->comm);"-lines.

And rtl8139_poll-function in rtl-8139-drivers is a function made directly 
for NAPI's poll(). But for me is hard to say what's wrong... I will try 
other 8139-card later today if it has a same problem.

> [...]
> > Same thing for me also, except for me it's interrupt 10 (CMI8738-MC6, 
> > eth0), so it's pointing more and more something rtl-8139 related.
> Possible. Which 8139 module do you use ?

8139too.
 
> It would be nice if this thread could be fed to netdev@oss.sgi.com.

Included in cc.

I haven't changed anything else in my hardware but just upgraded my 
10Mbps hub to 100Mbps switch. I guess 10Mbps wasn't generating too many 
interrupts. =)

Hope this helps..

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-26 15:15 ` ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related) Pasi Sjoholm
@ 2004-07-26 16:37   ` Robert Olsson
  2004-07-26 21:46     ` Pasi Sjoholm
  2004-07-26 16:55   ` Stephen Hemminger
  1 sibling, 1 reply; 32+ messages in thread
From: Robert Olsson @ 2004-07-26 16:37 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad


Pasi Sjoholm writes:

 > Pid: 2, comm:          ksoftirqd/0
 > EIP: 0060:[<e0871224>] CPU: 0
 > EIP is at rtl8139_poll+0xb4/0x100 [8139too]
 >  EFLAGS: 00000247    Not tainted  (2.6.7-mm7)
 > EAX: ffffe000 EBX: 00000040 ECX: df4824f8 EDX: c0441978
 > ESI: df482400 EDI: e0868000 EBP: dff85f80 DS: 007b ES: 007b
 > CR0: 8005003b CR2: b7c5a000 CR3: 1fafd000 CR4: 000006d0
 >  [<c0119580>] ksoftirqd+0x0/0xc0
 >  [<c02c5f3a>] net_rx_action+0x6a/0x110
 >  [<c01191a9>] __do_softirq+0xa9/0xb0
 >  [<c01191d7>] do_softirq+0x27/0x30
 >  [<c01195e8>] ksoftirqd+0x68/0xc0
 >  [<c01277e5>] kthread+0xa5/0xb0
 >  [<c0127740>] kthread+0x0/0xb0
 >  [<c0102111>] kernel_thread_helper+0x5/0x14
 > --
 > 
 > I'm not a kernel expert but it would seem that ksoftirqd is in some sort a 
 > loop because I didn't get any "printk("%s wakes ksoftirqd\n", 
 > current->comm);"-lines.

 Hello!
 This looks very much like the problem we see when doing route DoS testing
 with Alexey.

 In summary: High softirq loads can totally kill userland. The reason is that 
 do_softirq() is run from many places hard interrupts, local_bh_enable etc 
 and bypasses the ksoftirqd protection. It just been discussed at OLS with
 Andrea and Dipankar and others. Current RCU suffers from this problem as well.

 I've experimented some code to defer softirq's to ksoftirqd after a time as 
 well as deferring all softirq's to ksoftirqd. Andrea had some ideas as well 
 as Ingo.

 Cheers.
						--ro
 

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-26 15:15 ` ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related) Pasi Sjoholm
  2004-07-26 16:37   ` Robert Olsson
@ 2004-07-26 16:55   ` Stephen Hemminger
  2004-07-26 21:28     ` Pasi Sjoholm
  1 sibling, 1 reply; 32+ messages in thread
From: Stephen Hemminger @ 2004-07-26 16:55 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad

On Mon, 26 Jul 2004 18:15:34 +0300 (EEST)
Pasi Sjoholm <ptsjohol@cc.jyu.fi> wrote:

> On Sun, 25 Jul 2004, Francois Romieu wrote:
> 
> > Pasi Sjoholm <ptsjohol@cc.jyu.fi> :
> > [...]
> > > I haven't been able to reproduce this with normal www-browsing or 
> > > ssh-connections but it's always reproducible when my eth0 is under heavy 
> > > load.
> > I guess it can be reproduced even if the binary (nvidia ?) module is never
> > loaded after boot, right ?
> 
> After 24 hours of hard working I can answer yes to this question. 
> Now I can reproduce this from 2 to 15 minutes with 2 cp-processes from 
> samba->workstation->nfs, some software building with make -j 3, playing 
> some mp3 via nfs to notice when kernel goes down.. =) and so on..
> 
> After that ksoftirqd takes almost all the cpu-time and the network is not 
> working at all.

Is network traffic still coming in? or perhaps there is a network packet
that causes some soft irq to go into an infinite loop. The recent iptables bug
with ip options would be an example.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-26 16:55   ` Stephen Hemminger
@ 2004-07-26 21:28     ` Pasi Sjoholm
  0 siblings, 0 replies; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-26 21:28 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	Robert.Olsson

On Mon, 26 Jul 2004, Stephen Hemminger wrote:

>>>> I haven't been able to reproduce this with normal www-browsing or 
>>>> ssh-connections but it's always reproducible when my eth0 is under heavy 
>>>> load.
>>> I guess it can be reproduced even if the binary (nvidia ?) module is never
>>> loaded after boot, right ?

>> After 24 hours of hard working I can answer yes to this question. 
>> Now I can reproduce this from 2 to 15 minutes with 2 cp-processes from 
>> samba->workstation->nfs, some software building with make -j 3, playing 
>> some mp3 via nfs to notice when kernel goes down.. =) and so on..

>> After that ksoftirqd takes almost all the cpu-time and the network is not 
>> working at all.

> Is network traffic still coming in? 

At least tcpdump doesn't say anything, I can only see only arp-packets 
which my computer (not other computer's arp-packets) has made but no 
response to those.

> or perhaps there is a network packet that causes some soft irq to go 
> into an infinite loop. The recent iptables bug with ip options would be 
> an example.

That would make sense but Robert Olsson had some knowledge of this and 
said that this issue is probably the same that they have discussed in OSL.

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-26 16:37   ` Robert Olsson
@ 2004-07-26 21:46     ` Pasi Sjoholm
  2004-07-26 22:38       ` Robert Olsson
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-26 21:46 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Mon, 26 Jul 2004, Robert Olsson wrote:

> Pasi Sjoholm writes:

>  > Pid: 2, comm:          ksoftirqd/0
>  > EIP: 0060:[<e0871224>] CPU: 0
>  > EIP is at rtl8139_poll+0xb4/0x100 [8139too]
>  >  EFLAGS: 00000247    Not tainted  (2.6.7-mm7)
>  > EAX: ffffe000 EBX: 00000040 ECX: df4824f8 EDX: c0441978
>  > ESI: df482400 EDI: e0868000 EBP: dff85f80 DS: 007b ES: 007b
>  > CR0: 8005003b CR2: b7c5a000 CR3: 1fafd000 CR4: 000006d0
>  >  [<c0119580>] ksoftirqd+0x0/0xc0
>  >  [<c02c5f3a>] net_rx_action+0x6a/0x110
>  >  [<c01191a9>] __do_softirq+0xa9/0xb0
>  >  [<c01191d7>] do_softirq+0x27/0x30
>  >  [<c01195e8>] ksoftirqd+0x68/0xc0
>  >  [<c01277e5>] kthread+0xa5/0xb0
>  >  [<c0127740>] kthread+0x0/0xb0
>  >  [<c0102111>] kernel_thread_helper+0x5/0x14
>  > --
>  > I'm not a kernel expert but it would seem that ksoftirqd is in some sort a 
>  > loop because I didn't get any "printk("%s wakes ksoftirqd\n", 
>  > current->comm);"-lines.

>  Hello!

Hur är läget Robert?-)

>  This looks very much like the problem we see when doing route DoS testing
>  with Alexey.

Hmm, at least it sounds like same problem and in both situations network 
interface is kept busy.

>  In summary: High softirq loads can totally kill userland. The reason is that 
>  do_softirq() is run from many places hard interrupts, local_bh_enable etc 
>  and bypasses the ksoftirqd protection. It just been discussed at OLS with
>  Andrea and Dipankar and others. Current RCU suffers from this problem as well.

Ok, this explanation makes sense and my point of view I think this is 
quite critical problem if you can "crash" linux kernel just sending enough 
packets to network interface for an example.

>  I've experimented some code to defer softirq's to ksoftirqd after a time as 
>  well as deferring all softirq's to ksoftirqd. Andrea had some ideas as well 
>  as Ingo.

I would be more than glad to help you in testing if you want to publish 
some patches. 

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-26 21:46     ` Pasi Sjoholm
@ 2004-07-26 22:38       ` Robert Olsson
  2004-07-27  0:16         ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Robert Olsson @ 2004-07-26 22:38 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:

 > Hur är läget Robert?-)
   Tack bra!

 > >  In summary: High softirq loads can totally kill userland. The reason is that 
 > >  do_softirq() is run from many places hard interrupts, local_bh_enable etc 
 > >  and bypasses the ksoftirqd protection. It just been discussed at OLS with
 > >  Andrea and Dipankar and others. Current RCU suffers from this problem as well.
 > 
 > Ok, this explanation makes sense and my point of view I think this is 
 > quite critical problem if you can "crash" linux kernel just sending enough 
 > packets to network interface for an example.

 Well the packets also has to create hard softirq loads in practice this means route
 lookup or something else for normal traffic the RX_SOFIRQ is very well behaved
 and schedules itself to give other softirq's a chance to run also I'll guess you 
 have softirq's not only from the network. If you decrease your load a bit you come
 to more nomal operation?
 
 > I would be more than glad to help you in testing if you want to publish 
 > some patches. 

 Well maybe we should start to verify that this problem. Alexey did a litte program 
 doing gettimeofday to run to see how long user mode could be starved. Also note we 
 add new source of softirq's. 


#include <stdio.h> 
#include <sys/time.h>
#include <asm/types.h>

main()
{
	struct timeval tv, prev_tv;
	__s64 diff;
	__u32 i;
	__s32 maxlat = 50;

	  gettimeofday(&prev_tv, NULL);

	  printf("time control loop starting\n");

	  for (i=0;;i++) {
	      gettimeofday(&tv, NULL);
	      diff = (tv.tv_sec - prev_tv.tv_sec)*1000000 +
	             (tv.tv_usec - prev_tv.tv_usec);
	      if (diff > 1000000) 
		  printf("**%lld\n", diff);
	      prev_tv = tv;
	      if (diff > maxlat) {
		      maxlat = diff;
		      printf("new maxlat = %d\n", maxlat);
	      }
	      if(!(i % 1000000))
		      printf("timestamp diff = %lld, maxlat = %d\n", diff, maxlat);
	  }
}

Cheers.
							--ro

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-26 22:38       ` Robert Olsson
@ 2004-07-27  0:16         ` Pasi Sjoholm
  2004-07-27 11:10           ` Robert Olsson
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-27  0:16 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Tue, 27 Jul 2004, Robert Olsson wrote:

>>>  In summary: High softirq loads can totally kill userland. The reason is that 
>>>  do_softirq() is run from many places hard interrupts, local_bh_enable etc 
>>>  and bypasses the ksoftirqd protection. It just been discussed at OLS with
>>>  Andrea and Dipankar and others. Current RCU suffers from this problem as well.
>> Ok, this explanation makes sense and my point of view I think this is 
>> quite critical problem if you can "crash" linux kernel just sending enough 
>> packets to network interface for an example.
> 
>  Well the packets also has to create hard softirq loads in practice this means route
>  lookup or something else for normal traffic the RX_SOFIRQ is very well behaved
>  and schedules itself to give other softirq's a chance to run also I'll guess you 
>  have softirq's not only from the network. If you decrease your load a bit you come
>  to more nomal operation?

The system will be more responsive but won't ever come back to normal 
operation and if I use my computer just for some chatting/writing/browsing 
the ksoftirqd-problem won't ever happen. 

It is also very hard to reproduce with only high network load. I ran some 
test with 100Mbps load (RX and TX both) for 24hours without any problems 
but soon as I started to compile some stuff or doing something 
cpu-intensive the system would do crash-boom-bang.

>> I would be more than glad to help you in testing if you want to publish 
>> some patches. 
>  Well maybe we should start to verify that this problem. Alexey did a litte program 
>  doing gettimeofday to run to see how long user mode could be starved. Also note we 
>  add new source of softirq's. 

First run:

--
timestamp diff = 1, maxlat = 50963
timestamp diff = 0, maxlat = 50963
timestamp diff = 0, maxlat = 50963
timestamp diff = 1, maxlat = 50963
timestamp diff = 0, maxlat = 50963
timestamp diff = 0, maxlat = 50963
new maxlat = 124428
new maxlat = 511817
timestamp diff = 1, maxlat = 511817
new maxlat = 749913
new maxlat = 775142
**4581159
new maxlat = 4581159
**1704065
**1464153
timestamp diff = 1, maxlat = 4581159
**1448939
**1464013
**1021339
**1568588
**1861657
timestamp diff = 0, maxlat = 4581159
--

Second run:

--
timestamp diff = 0, maxlat = 832003
timestamp diff = 1, maxlat = 832003
timestamp diff = 0, maxlat = 832003
timestamp diff = 0, maxlat = 832003
timestamp diff = 0, maxlat = 832003
timestamp diff = 1, maxlat = 832003
**1793951
new maxlat = 1793951
timestamp diff = 0, maxlat = 1793951
**2579893
new maxlat = 2579893
timestamp diff = 0, maxlat = 2579893
timestamp diff = 0, maxlat = 2579893
timestamp diff = 0, maxlat = 2579893
**1004449
**3199625
new maxlat = 3199625
timestamp diff = 0, maxlat = 3199625
timestamp diff = 1, maxlat = 3199625
--

When the system has gone to this state you will have to wait quite a long 
time to receive a new line.

--
Pasi Sjöholm 

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-27  0:16         ` Pasi Sjoholm
@ 2004-07-27 11:10           ` Robert Olsson
  2004-07-27 14:20             ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Robert Olsson @ 2004-07-27 11:10 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:

 > First run:
 > timestamp diff = 0, maxlat = 4581159

 Yes you starved your user apps for ~5 sec. 
 
 Any idea where your load comes from? Pure NFS network load cannot be hard.
 
 Cheers.
						--ro

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-27 11:10           ` Robert Olsson
@ 2004-07-27 14:20             ` Pasi Sjoholm
  2004-07-27 20:24               ` Robert Olsson
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-27 14:20 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Tue, 27 Jul 2004, Robert Olsson wrote:

>  > First run:
>  > timestamp diff = 0, maxlat = 4581159
>  Yes you starved your user apps for ~5 sec. 
>  Any idea where your load comes from? Pure NFS network load cannot be hard.

Yeah, when the ksoftirqd is taking all the cpu it will be like that, but 
when the kernel is behaving normally the starving diff is between 0->1sec.

With two samba-server-> workstation -> nfs-server file copys on the diff 
is normally 0.02secs if I'm not doing anything else but If I will do 
something cpu-intensive like compiling the kernel with "make -j3", the 
ksoftirqd will be soon taking all the cpu-time but this requires that I 
will also have those network file transfers going on.

--
Pasi Sjöholm



  

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-27 14:20             ` Pasi Sjoholm
@ 2004-07-27 20:24               ` Robert Olsson
  2004-07-27 21:27                 ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Robert Olsson @ 2004-07-27 20:24 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:

 > Yeah, when the ksoftirqd is taking all the cpu it will be like that, but 
 > when the kernel is behaving normally the starving diff is between 0->1sec.

 Well ksoftirqd makes your kernel load just visible which is good and 
 ksofirqd gets accounted for this when softirq's get deferred to it.
 It may look like goes from 0 to 100% but thats probably not the case.
 The problem is we can starve userland at high loads. As said we were
 trying some way to cure this I may have some old patch if you like to try.

 Cheers.
						--ro
 

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-27 20:24               ` Robert Olsson
@ 2004-07-27 21:27                 ` Pasi Sjoholm
  2004-07-28 18:35                   ` Robert Olsson
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-27 21:27 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Tue, 27 Jul 2004, Robert Olsson wrote:

>  > Yeah, when the ksoftirqd is taking all the cpu it will be like that, but 
>  > when the kernel is behaving normally the starving diff is between 0->1sec.
>  Well ksoftirqd makes your kernel load just visible which is good and 
>  ksofirqd gets accounted for this when softirq's get deferred to it.
>  It may look like goes from 0 to 100% but thats probably not the case.
>  The problem is we can starve userland at high loads. As said we were
>  trying some way to cure this I may have some old patch if you like to try.

Ok, as I said before I'm willing to test your patches. 

It would be nice that one could use the full capacity of his/her computer.
This is not a big problem for everyday use for a workstation but prevents 
2.6-series to be used in production-enviroments in the servers.
But hey.. we need to do some work and maybe we will resolve this. =)

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-27 21:27                 ` Pasi Sjoholm
@ 2004-07-28 18:35                   ` Robert Olsson
  2004-07-28 19:09                     ` James Morris
  2004-07-29  0:46                     ` Pasi Sjoholm
  0 siblings, 2 replies; 32+ messages in thread
From: Robert Olsson @ 2004-07-28 18:35 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:
 > On Tue, 27 Jul 2004, Robert Olsson wrote:

 > It would be nice that one could use the full capacity of his/her computer.
 > This is not a big problem for everyday use for a workstation but prevents 
 > 2.6-series to be used in production-enviroments in the servers.
 > But hey.. we need to do some work and maybe we will resolve this. =)

Well the 2.6 problem we ran into was due to the fact that RCU suffered
from this userland starvation. I think Dipankar is pushing a patch for RCU
this now. But it does not address userland starvation so if you your workload 
can give reproduceably results wrt starvation (Alexey's app) we can do some
tests. First I think should be collect data from current system and check 
that results a reproduceable. 

Below is a patch to monitor softirq's it uses fastroute stats in softnet_stat
you may have to hack it.

And maybe we should take the experiment disussions off the list.

Cheers.
						--ro


--- kernel/softirq.c.orig	2004-03-11 03:55:24.000000000 +0100
+++ kernel/softirq.c	2004-03-31 18:15:26.000000000 +0200
@@ -69,7 +69,13 @@
  */
 #define MAX_SOFTIRQ_RESTART 10
 
-asmlinkage void do_softirq(void)
+
+/* Use the fastroute stats. */
+
+#include <linux/netdevice.h>
+DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
+
+asmlinkage void do_softirq(int from)
 {
 	int max_restart = MAX_SOFTIRQ_RESTART;
 	__u32 pending;
@@ -84,9 +90,44 @@
 
 	if (pending) {
 		struct softirq_action *h;
+		struct task_struct *tsk = __get_cpu_var(ksoftirqd);
 
 		local_bh_disable();
+#if 0		
+		/* Avoid softirq's from DoS'ing user apps incl. RCU's etc */
+
+                if (unlikely(from != SIRQ_FROM_KSOFTIRQD && 
+			     tsk  &&
+			     sched_clock() - tsk->timestamp > 
+			     (unsigned long long) 2*1000000000 &&
+			     !(current->state & (TASK_DEAD | TASK_ZOMBIE)))) {
+			
+			set_tsk_need_resched(current);
+			local_irq_disable();
+			goto done;
+                }
+#endif
+
 restart:
+		switch (from) {
+			
+		case SIRQ_FROM_BH:
+			__get_cpu_var(netdev_rx_stat).fastroute_hit++;
+			break;
+			
+		case SIRQ_FROM_KSOFTIRQD:
+			__get_cpu_var(netdev_rx_stat).fastroute_success++;
+			break;
+
+		case SIRQ_FROM_IRQEXIT:
+			__get_cpu_var(netdev_rx_stat).fastroute_defer++;
+			break;
+  
+
+		default:
+			__get_cpu_var(netdev_rx_stat).fastroute_deferred_out++;
+			
+		}
 		/* Reset the pending bitmask before enabling irqs */
 		local_softirq_pending() = 0;
 
@@ -106,6 +147,7 @@
 		pending = local_softirq_pending();
 		if (pending && --max_restart)
 			goto restart;
+ done:
 		if (pending)
 			wakeup_softirqd();
 		__local_bh_enable();
@@ -122,7 +164,7 @@
 	WARN_ON(irqs_disabled());
 	if (unlikely(!in_interrupt() &&
 		     local_softirq_pending()))
-		invoke_softirq();
+		invoke_softirq(SIRQ_FROM_BH);
 	preempt_check_resched();
 }
 EXPORT_SYMBOL(local_bh_enable);
@@ -324,7 +366,7 @@
 		__set_current_state(TASK_RUNNING);
 
 		while (local_softirq_pending()) {
-			do_softirq();
+			do_softirq(SIRQ_FROM_KSOFTIRQD);
 			cond_resched();
 		}
 
--- include/linux/netdevice.h~	2004-03-11 03:55:44.000000000 +0100
+++ include/linux/netdevice.h	2004-03-31 12:24:57.000000000 +0200
@@ -669,7 +669,7 @@
 {
        int err = netif_rx(skb);
        if (softirq_pending(smp_processor_id()))
-               do_softirq();
+               do_softirq(SIRQ_FROM_NETIF_RX_NI);
        return err;
 }
 
--- include/linux/interrupt.h.orig	2004-03-31 18:24:03.000000000 +0200
+++ include/linux/interrupt.h	2004-03-31 18:19:28.000000000 +0200
@@ -92,7 +92,17 @@
 	void	*data;
 };
 
-asmlinkage void do_softirq(void);
+/* Softirq originator */
+enum 
+{
+	SIRQ_FROM_KSOFTIRQD=0,
+	SIRQ_FROM_IRQEXIT,
+	SIRQ_FROM_BH,
+	SIRQ_FROM_NETIF_RX_NI,
+	SIRQ_FROM_PKTGEN
+};
+
+asmlinkage void do_softirq(int from);
 extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
 extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0)
@@ -100,7 +110,7 @@
 extern void FASTCALL(raise_softirq(unsigned int nr));
 
 #ifndef invoke_softirq
-#define invoke_softirq() do_softirq()
+#define invoke_softirq(from) do_softirq(from)
 #endif
 
 
--- include/asm-i386/hardirq.h.orig	2004-03-11 03:55:37.000000000 +0100
+++ include/asm-i386/hardirq.h	2004-03-31 18:27:17.000000000 +0200
@@ -88,7 +88,7 @@
 do {									\
 		preempt_count() -= IRQ_EXIT_OFFSET;			\
 		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-			do_softirq();					\
+			do_softirq(SIRQ_FROM_IRQEXIT);			\
 		preempt_enable_no_resched();				\
 } while (0)
 
--- net/core/pktgen.c~	2004-03-11 03:55:36.000000000 +0100
+++ net/core/pktgen.c	2004-03-31 12:24:57.000000000 +0200
@@ -710,7 +710,7 @@
 				if (need_resched())
 					schedule();
 				else
-					do_softirq();
+					do_softirq(SIRQ_FROM_PKTGEN);
 			} while (netif_queue_stopped(odev));
 			idle = cycles() - idle_start;
 			info->idle_acc += idle;

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-28 18:35                   ` Robert Olsson
@ 2004-07-28 19:09                     ` James Morris
  2004-07-29  0:46                     ` Pasi Sjoholm
  1 sibling, 0 replies; 32+ messages in thread
From: James Morris @ 2004-07-28 19:09 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Pasi Sjoholm, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger

On Wed, 28 Jul 2004, Robert Olsson wrote:

> And maybe we should take the experiment disussions off the list.

This is what netdev is for :-)


- James
-- 
James Morris
<jmorris@redhat.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-28 18:35                   ` Robert Olsson
  2004-07-28 19:09                     ` James Morris
@ 2004-07-29  0:46                     ` Pasi Sjoholm
  2004-07-30  9:18                       ` Robert Olsson
  1 sibling, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-29  0:46 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Wed, 28 Jul 2004, Robert Olsson wrote:

>> It would be nice that one could use the full capacity of his/her computer.
>> This is not a big problem for everyday use for a workstation but prevents 
>> 2.6-series to be used in production-enviroments in the servers.
>> But hey.. we need to do some work and maybe we will resolve this. =)
> this now. But it does not address userland starvation so if you your workload 
> can give reproduceably results wrt starvation (Alexey's app) we can do some
> tests. First I think should be collect data from current system and check 
> that results a reproduceable. 

It takes about 2 minutes to reproduce the symptoms so it's not a problem 
anymore when I know exactly what I have to do.
 
> Below is a patch to monitor softirq's it uses fastroute stats in softnet_stat
> you may have to hack it.

Ok, I had to do some modifications but here are the results:

while true; cat /proc/net/softnet_stat | tee -a log.txt; sleep 5; done

The first log is when running exact same patch you sent.

--
000401f1 00000000 00000000 00000000 000002ec 000000d8 00084026
0004495c 00000000 00000000 00000000 00000326 000000d8 0008ae93
0004820b 00000000 00000000 00000000 0000034a 000000d8 00090755
0004a613 00000000 00000000 00000000 00000358 000000d8 00093f0f
0004ca12 00000000 00000000 00000000 00000370 000000da 000976c9
000500f2 00000000 00000000 00000000 0000045e 000000da 0009cf4b
0005417b 00000000 00000000 00000000 000005f8 000000da 000a36b5
00056a66 00000000 00000000 00000000 0000064c 000000da 000a7619
0005a94b 00000000 00000000 00000000 000007bf 000000da 000ad9da
0005d9b7 00000000 00000000 00000000 00000816 000000db 000b1fff
00060286 00000000 00000000 00000000 00000834 000000db 000b5dee
00064ffb 00000000 00000000 00000000 00000a0c 000000db 000bd33c
00069498 00000000 00000000 00000000 00000b97 000000db 000c3d62
0006cdf1 00000000 00000000 00000000 00000cc5 000000db 000c972b
0006f9cc 00000000 00000000 00000000 00000d43 000000db 000cde12
0007280d 00000000 00000000 00000000 00000dea 000000db 000d268d
00074f33 00000000 00000000 00000000 00000e3e 000000db 000d655b
00078271 00000000 00000000 00000000 00000f45 000000db 000db849
0007beee 00000000 00000000 00000000 0000106e 000000db 000e18ae
0007e402 00000000 00000000 00000000 00001086 000000db 000e513b
000815c4 00000000 00000000 00000000 0000114d 000000db 000e9d33
00082abc 00000000 0000076c 00000000 000011f0 000001ad 000ec552
00082abc 00000000 00001180 00000000 000014e8 00000207 000ecc14
00082abc 00000000 00001b44 00000000 000014e8 00000257 000ed588
00082abc 00000000 0000251c 00000000 000018bc 000002bb 000edb28
00082abc 00000000 00002ee0 00000000 00001970 0000033d 000ee3b6
00082abc 00000000 000038e0 00000000 00001eac 0000038d 000ee82a
00082abc 00000000 0000443e 00000000 00002244 00000405 000eef78
00082abc 00000000 00004e02 00000000 000024c4 00000469 000ef658
00082abc 00000000 000057c6 00000000 000026cc 000004c3 000efdba
00082abc 00000000 000061da 00000000 00002910 00000513 000f053a
00082abc 00000000 00006bbc 00000000 00002b2c 0000056d 000f0ca6
00082abe 00000000 000075c6 00000000 00002e10 000005d1 000f1368
00082abe 00000000 00007f9e 00000000 000030a4 00000635 000f1a48
00082abe 00000000 000089da 00000000 00003338 000006a3 000f2182
00082abe 00000000 00009420 00000000 00003554 000006fd 000f2952
00082abe 00000000 00009e70 00000000 00003c34 00000829 000f2b96
00082ac0 00000000 0000a8c0 00000000 000044cc 00000991 000f2be6
00082ac0 00000000 0000b2e8 00000000 00004814 00000fd1 000f2c86
00082ac0 00000000 0000bcfc 00000000 00004814 0000199f 000f2ccc
00082ac0 00000000 0000c72e 00000000 00004814 00002377 000f2d26
00082ac2 00000000 0000d142 00000000 00004864 00002c19 000f2e48
00082ac2 00000000 0000db56 00000000 00004864 0000358d 000f2ee8
00082ac2 00000000 0000e574 00000000 00004864 00003f33 000f2f60
00082ac2 00000000 0000ef9c 00000000 0000497c 000047e9 000f2fba
00082ac2 00000000 0000f9b0 00000000 0000497c 000051ad 000f300a
00082ac2 00000000 000103ce 00000000 0000497c 00005b7b 000f305a
00082ac4 00000000 00010dec 00000000 0000497c 00006549 000f30aa
00082ac4 00000000 0001180a 00000000 0000497c 00006f17 000f30fa
00082ac4 00000000 0001225a 00000000 00004a94 00007809 000f3140
--

and the second one is when that if-condition is true (just wanted to try 
if that would make any difference):

#if 1
        /* Avoid softirq's from DoS'ing user apps incl. RCU's etc */

--
00000082 00000000 00000000 00000000 00000010 00000116 0001fe40
00000082 00000000 00000000 00000000 00000010 00000119 000211f9
0000094d 00000000 00000000 00000000 00000014 0000011b 00022e69
00004ab8 00000000 00000000 00000000 00000032 0000011d 0002877e
00006b19 00000000 00000000 00000000 0000003f 0000011f 0002cb8e
0000c7a0 00000000 00000000 00000000 00000073 00000122 0003409e
0001334a 00000000 00000000 00000000 000000d6 00000124 0003dd4e
00017537 00000000 00000000 00000000 00000113 00000127 00044598
0001b528 00000000 00000000 00000000 0000015a 00000129 0004acb5
0001ec8f 00000000 00000000 00000000 000001ae 0000012b 0005024a
00021186 00000000 00000000 00000000 000001c1 0000012e 00053ace
000236f1 00000000 00000000 00000000 00000205 0000012e 000575c7
00026980 00000000 00000000 00000000 0000032b 0000012e 0005c601
0002a70a 00000000 00000000 00000000 000004aa 0000012e 0006258f
0002e715 00000000 00000000 00000000 00000664 0000012e 00068ddc
00030c8b 00000000 00000000 00000000 00000690 0000012e 0006c872
0003303e 00000000 00000000 00000000 000006a3 0000012e 0006ffc6
00036172 00000000 00000000 00000000 00000786 0000012e 00074e6d
0003a3c8 00000000 00000000 00000000 0000096a 0000012e 0007b998
0003d62b 00000000 00000000 00000000 00000a85 0000012e 000808b0
000401ab 00000000 00000000 00000000 00000aa4 0000012e 000847bf
000426ba 00000000 00000000 00000000 00000ab5 0000012e 0008807b
00046099 00000000 00000000 00000000 00000c57 0000012e 0008dd31
0004a27a 00000000 00000000 00000000 00000e0b 0000012e 00094686
0004c2dc 00000000 00000122 00000000 00000e2c 000001b0 000979c8
0004c2dc 00000000 00000bae 00000000 00000e2c 00000228 000983dc
0004c2dc 00000000 00001568 00000000 00001084 00000282 00098ae4
0004c2dc 00000000 00001f0e 00000000 00001084 000002f0 0009941c
0004c2dc 00000000 00002968 00000000 00001084 00000368 00099dfe
0004c2dc 00000000 00003354 00000000 00001426 000003ae 0009a402
0004c2dc 00000000 00003d04 00000000 0000150c 0000041c 0009ac5e
0004c2dc 00000000 00004790 00000000 00001548 00000494 0009b636
0004c2dc 00000000 00005140 00000000 00001548 00000502 0009bf78
0004c2de 00000000 00005b68 00000000 00001548 0000057a 0009c928
0004c2e0 00000000 000065ae 00000000 00001598 000005e8 0009d2b0
0004c2e2 00000000 00006f4a 00000000 00001598 00000660 0009dbd4
0004c2e2 00000000 000079a4 00000000 00001660 000006ce 0009e4f8
0004c2e2 00000000 000083d6 00000000 00001660 00000746 0009eeb2
0004c2e2 00000000 00008e08 00000000 00001764 000007b4 0009f772
0004c2ee 00000000 00009858 00000000 00001764 0000082c 000a014a
0004c2f6 00000000 0000a1fe 00000000 00001764 0000089a 000a0a82
0004c2fc 00000000 0000abcc 00000000 0000182c 000008f4 000a132e
0004c2fc 00000000 0000b626 00000000 0000182c 000011aa 000a14d2
0004c302 00000000 0000c03a 00000000 00001872 00001aec 000a155e
--

and it did not make any difference. I have cut out the output of "cat 
softnet_stat to show columns from 1 to 7. 

- When the ksoftirqd starts to eat cpu-time time_squeeze-value (3rd 
column) starts growing (in both cases it's same thing). 

- We are also getting more hits from SIRQ_FROM_KSOFTIRQD 
immediately after that. (6th column)

- Total-column's value stops growing although network file transfers 
are still on. (1st column)
 
> And maybe we should take the experiment disussions off the list.

I think that we should leave netdev as Francois requested it in first 
place but we can drop the lkml if you want to.

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-29  0:46                     ` Pasi Sjoholm
@ 2004-07-30  9:18                       ` Robert Olsson
  2004-07-30 12:56                         ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Robert Olsson @ 2004-07-30  9:18 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:

You should monitor the the user app (gettimeofday()monitoring for starvation
this is the most important measure and what we are trying to improve.

We can hardly expect softirq's alone to give us the balance of load we wish. 
At overload something has to get less resources. Even we defer all softirq's 
to scheduler context there is no way making any distinguish between them 
unless we run them in separate processes i.e one RX_SOFIRQ, TX_SOFIRQ etc. 
This could solve some problem I just discussed with Jamal where the RX 
softirq overruns the TX softirq and causes drop at egress (qdisc) when bus 
BW is saturated. Running softirq's under schedules context's can cause other 
delays and other problems.

About tour test;
I dunno if the absolute values are comparable but we see a some change 
in behavior. I use your two last lines from your test assuming these
are the highest loaded 

                                    BH       KSOFT    IRQ-exit
--------------------------------------------------------------
00082ac4 00000000 0001180a 00000000 0000497c 00006f17 000f30fa
00082ac4 00000000 0001225a 00000000 00004a94 00007809 000f3140
280+2290+70 = 2640                        10       87        3 %


(Deffering to ksoftirqd after 2 sec patch)
0004c2fc 00000000 0000b626 00000000 0000182c 000011aa 000a14d2
0004c302 00000000 0000c03a 00000000 00001872 00001aec 000a155e
70+2370+140=2580                           3       92        5 %

So most ksoftirq's runs most softirq's which is good. Without this you would 
not be able to type any commands at all. Also we see some effects from the 
path. Can you monitor userland starvation here too?

> - When the ksoftirqd starts to eat cpu-time time_squeeze-value (3rd 
> column) starts growing (in both cases it's same thing). 

This OK as we have to throttle.

> - We are also getting more hits from SIRQ_FROM_KSOFTIRQD 
> immediately after that. (6th column)

Good. 

> - Total-column's value stops growing although network file transfers 
> are still on. (1st column)

Well ksoftirqd now runs RX softirq and competes heavily with other processes 
for your CPU you may have to adjust priorities to get your desired balance.
Can you experiment a bit?
 
> > And maybe we should take the experiment disussions off the list.

> I think that we should leave netdev as Francois requested it in first 
> place but we can drop the lkml if you want to.

Well it's not only a network issue.

Cheers.
						--ro

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-30  9:18                       ` Robert Olsson
@ 2004-07-30 12:56                         ` Pasi Sjoholm
  2004-07-30 14:05                           ` Robert Olsson
  2004-07-30 14:12                           ` Robert Olsson
  0 siblings, 2 replies; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-30 12:56 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Fri, 30 Jul 2004, Robert Olsson wrote:

> You should monitor the the user app (gettimeofday()monitoring for starvation
> this is the most important measure and what we are trying to improve.

I can do that after couple of days.. I have to get married tomorrow and 
spend some time with my wife. =)
 
> We can hardly expect softirq's alone to give us the balance of load we wish. 
> At overload something has to get less resources. Even we defer all softirq's 
> to scheduler context there is no way making any distinguish between them 
> unless we run them in separate processes i.e one RX_SOFIRQ, TX_SOFIRQ etc. 
> This could solve some problem I just discussed with Jamal where the RX 
> softirq overruns the TX softirq and causes drop at egress (qdisc) when bus 
> BW is saturated. Running softirq's under schedules context's can cause other 
> delays and other problems.

Ok, I understand that you can't do 110% if you have only 100% so someone 
has to wait. It would not matter if networks speed would slow down to 
1/100 from the maximum speed if it would still somehow work and not 
crashing the whole userspace.

I don't remember if I have said this but when the ksoftirqd has started to 
take all the cpu-time there is no way to stop it excluding booting 
computer. You can kill or stop all the processes which are taking your 
cpu-time (ie. source compiling) but network wont start to work or neither 
there is no free cpu-time for use because ksoftirqd won't stop eating it.

Actually, for now I would not care how much the kernel would slow down but 
we have to get some stability. Restarting your computer everytime this 
happens is not a solution.

> So most ksoftirq's runs most softirq's which is good. Without this you would 
> not be able to type any commands at all. Also we see some effects from the 
> path. Can you monitor userland starvation here too?

Sure.. 

> > - When the ksoftirqd starts to eat cpu-time time_squeeze-value (3rd 
> > column) starts growing (in both cases it's same thing). 
> This OK as we have to throttle.

Sure, but we should not crash the whole userspace. Why does kernel 
suddenly think that it won't give any time for softirq's. Or it 
does because I can write commands and etc. but the network won't work at all.
 
> > - Total-column's value stops growing although network file transfers 
> > are still on. (1st column)
> Well ksoftirqd now runs RX softirq and competes heavily with other processes 
> for your CPU you may have to adjust priorities to get your desired balance.
> Can you experiment a bit?

There is nothing to priorise after I have killed/quitted the jobs which we 
taking all the cpu-time. Nothing more left than ksoftirqd and it will eat 
all the cpu-time.

Of course I could try something like "nice make -j3" and see if it will 
still do the same shit.
  
--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-30 12:56                         ` Pasi Sjoholm
@ 2004-07-30 14:05                           ` Robert Olsson
  2004-07-30 18:40                             ` Pasi Sjoholm
  2004-07-30 14:12                           ` Robert Olsson
  1 sibling, 1 reply; 32+ messages in thread
From: Robert Olsson @ 2004-07-30 14:05 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:

 > I don't remember if I have said this but when the ksoftirqd has started to 
 > take all the cpu-time there is no way to stop it excluding booting 
 > computer. You can kill or stop all the processes which are taking your 
 > cpu-time (ie. source compiling) but network wont start to work or neither 
 > there is no free cpu-time for use because ksoftirqd won't stop eating it.

 No you didn't then it seems you are hit by some bug. Have the bug happen.
 Kill userland processes like as you did. Try find out what's running. I would 
 look in /proc/interrupt /proc/net/softnet_stat /proc/net/dev or maybe best to 
 take a profile if possible or Magic SysRq to find out what's looping. Also try
 ifconfig down.

 Cheers.
						--ro

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-30 12:56                         ` Pasi Sjoholm
  2004-07-30 14:05                           ` Robert Olsson
@ 2004-07-30 14:12                           ` Robert Olsson
  1 sibling, 0 replies; 32+ messages in thread
From: Robert Olsson @ 2004-07-30 14:12 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm,
	netdev, brad, shemminger


Pasi Sjoholm writes:

 > I can do that after couple of days.. I have to get married tomorrow and 
 > spend some time with my wife. =)

 Gratulerar! 
 Jag höll på missa det. No hurry for me I have a couple of days off.

						--ro

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-30 14:05                           ` Robert Olsson
@ 2004-07-30 18:40                             ` Pasi Sjoholm
  2004-07-31 12:33                               ` Francois Romieu
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-07-30 18:40 UTC (permalink / raw)
  To: Robert Olsson
  Cc: Francois Romieu, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

[-- Attachment #1: Type: TEXT/PLAIN, Size: 4306 bytes --]

>> I don't remember if I have said this but when the ksoftirqd has started to 
>> take all the cpu-time there is no way to stop it excluding booting 
>> computer. You can kill or stop all the processes which are taking your 
>> cpu-time (ie. source compiling) but network wont start to work or neither 
>> there is no free cpu-time for use because ksoftirqd won't stop eating it.
>  No you didn't then it seems you are hit by some bug. Have the bug happen.
>  Kill userland processes like as you did. Try find out what's running. I would 
>  look in /proc/interrupt /proc/net/softnet_stat /proc/net/dev or maybe best to 
>  take a profile if possible or Magic SysRq to find out what's looping. Also try
>  ifconfig down.

Ok, now I know how to go back in the normal state of kernel operation. I 
just have to do "ifconfig eth0 down" and ksoftirqd stop taking all the 
cpu-time, after that I can do "ifconfig eth0 up" and everything is working 
fine.

So I guess it is 8139too-driver which has a bug.

First "SysRq : Show Regs" when the kernel is ok:

--
Pid: 0, comm:              swapper
EIP: 0060:[<c0101ed3>] CPU: 0
EIP is at default_idle+0x23/0x30
 EFLAGS: 00000246    Not tainted  (2.6.7-mm7)
EAX: 00000000 EBX: c03cc000 ECX: 00000000 EDX: 0006a546
ESI: 00099100 EDI: c0427120 EBP: 0046e007 DS: 007b ES: 007b
CR0: 8005003b CR2: 08101000 CR3: 1e9b7000 CR4: 000006d0
 [<c0101f3c>] cpu_idle+0x2c/0x40
 [<c03ce832>] start_kernel+0x162/0x180
 [<c03ce460>] unknown_bootoption+0x0/0x160
--

Second "SysRq : Show Regs" when the ksoftirqd has started to take all the 
cpu-time:

--
Pid: 2, comm:          ksoftirqd/0
EIP: 0060:[<e0871224>] CPU: 0
EIP is at rtl8139_poll+0xb4/0x100 [8139too]
 EFLAGS: 00000247    Not tainted  (2.6.7-mm7)
EAX: ffffe000 EBX: 00000040 ECX: dfa654f8 EDX: c0441978
ESI: dfa65400 EDI: e0868000 EBP: dff85f84 DS: 007b ES: 007b
CR0: 8005003b CR2: 080e9024 CR3: 1e9b7000 CR4: 000006d0
 [<c01194b0>] ksoftirqd+0x0/0xc0
 [<c02c5e3a>] net_rx_action+0x6a/0x110
 [<c011917d>] __do_softirq+0x7d/0x80
 [<c01191a6>] do_softirq+0x26/0x30
 [<c0119518>] ksoftirqd+0x68/0xc0
 [<c01276e5>] kthread+0xa5/0xb0
 [<c0127640>] kthread+0x0/0xb0
 [<c0102111>] kernel_thread_helper+0x5/0x14
--

Third one after I have done ifconfig eth0 down/up.

--
Pid: 0, comm:              swapper
EIP: 0060:[<c0101ed3>] CPU: 0
EIP is at default_idle+0x23/0x30
 EFLAGS: 00000246    Not tainted  (2.6.7-mm7)
EAX: 00000000 EBX: c03cc000 ECX: 00000000 EDX: 000cf65e
ESI: 00099100 EDI: c0427120 EBP: 0046e007 DS: 007b ES: 007b
CR0: 8005003b CR2: b7c5a000 CR3: 1e9b7000 CR4: 000006d0
 [<c0101f3c>] cpu_idle+0x2c/0x40
 [<c03ce832>] start_kernel+0x162/0x180
 [<c03ce460>] unknown_bootoption+0x0/0x160
--

I also compiled 8139too-driver with debub-options on and the logfile is 
included as attachment. 

Everything is going ok until end of the 20:41:29, then it happens:

--
20:41:29  kernel: rtl8139_rx: eth0: In rtl8139_rx(), current 004c BufAddr 
0c80, free to 003c, Cmd 0c.
20:41:29  kernel: rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0001.
20:41:29  kernel: rtl8139_rx: eth0: In rtl8139_rx(), current 11bc BufAddr 
22e0, free to 11ac, Cmd 0c.
20:41:29  kernel: rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0000.
20:41:29  last message repeated 4 times
20:41:29  kernel: rtl8139_rx: eth0: In rtl8139_rx(), current 2438 BufAddr 
242c, free to 2428, Cmd 0d.
20:41:29  kernel: rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0010.
20:41:29  kernel: rtl8139_rx: eth0: In rtl8139_rx(), current 2438 BufAddr 
242c, free to 2428, Cmd 0d.
20:41:29  kernel: rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0010.
20:41:29  kernel: rtl8139_rx: eth0: In rtl8139_rx(), current 2438 BufAddr 
242c, free to 2428, Cmd 0d.
20:41:29  kernel: rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0010.
20:41:29  kernel: rtl8139_rx: eth0: In rtl8139_rx(), current 2438 BufAddr 
242c, free to 2428, Cmd 0d.
--

The hardest part is to tell where the problem is but I think that 
rtl8139_poll-function would be good place to start looking for the bug?

readprofile didn't tell much.. Where to go next? I'm not so good 
programmer that I could find the right place to fix..

--
Pasi Sjöholm

[-- Attachment #2: Type: APPLICATION/x-gzip, Size: 8579 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-30 18:40                             ` Pasi Sjoholm
@ 2004-07-31 12:33                               ` Francois Romieu
  2004-08-02  9:57                                 ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Francois Romieu @ 2004-07-31 12:33 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

[-- Attachment #1: Type: text/plain, Size: 676 bytes --]

Pasi Sjoholm <ptsjohol@cc.jyu.fi> :
[interesting report]
> The hardest part is to tell where the problem is but I think that 
> rtl8139_poll-function would be good place to start looking for the bug?
> 
> readprofile didn't tell much.. Where to go next? I'm not so good 
> programmer that I could find the right place to fix..

In case it could make a difference: did you check if CONFIG_8139_OLD_RX_RESET
changes the behavior or not ?

If it does not, I'd welcome a test report + log with the two attached patch
applied. The first one is just a placebo but the second one could help.

Btw, you are probably right that the issue is not related to ksoftirqd at all.

--
Ueimor

[-- Attachment #2: r8139-10.patch --]
[-- Type: text/plain, Size: 1605 bytes --]

 drivers/net/8139too.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff -puN drivers/net/8139too.c~r8139-10 drivers/net/8139too.c
--- linux-2.6.8-rc2/drivers/net/8139too.c~r8139-10	2004-07-31 12:43:44.000000000 +0200
+++ linux-2.6.8-rc2-romieu/drivers/net/8139too.c	2004-07-31 12:43:44.000000000 +0200
@@ -1934,6 +1934,7 @@ static int rtl8139_rx(struct net_device 
 	int received = 0;
 	unsigned char *rx_ring = tp->rx_ring;
 	unsigned int cur_rx = tp->cur_rx;
+	u16 status;
 
 	DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x,"
 		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
@@ -1947,7 +1948,6 @@ static int rtl8139_rx(struct net_device 
 		unsigned int rx_size;
 		unsigned int pkt_size;
 		struct sk_buff *skb;
-		u16 status;
 
 		rmb();
 
@@ -2024,17 +2024,17 @@ static int rtl8139_rx(struct net_device 
 
 		cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
 		RTL_W16 (RxBufPtr, (u16) (cur_rx - 16));
+	}
 
-		/* Clear out errors and receive interrupts */
-		status = RTL_R16 (IntrStatus) & RxAckBits;
-		if (likely(status != 0)) {
-			if (unlikely(status & (RxFIFOOver | RxOverflow))) {
-				tp->stats.rx_errors++;
-				if (status & RxFIFOOver)
-					tp->stats.rx_fifo_errors++;
-			}
-			RTL_W16_F (IntrStatus, RxAckBits);
+	/* Clear out errors and receive interrupts */
+	status = RTL_R16 (IntrStatus) & RxAckBits;
+	if (likely(status != 0)) {
+		if (unlikely(status & (RxFIFOOver | RxOverflow))) {
+			tp->stats.rx_errors++;
+			if (status & RxFIFOOver)
+				tp->stats.rx_fifo_errors++;
 		}
+		RTL_W16_F (IntrStatus, RxAckBits);
 	}
 
  done:

_

[-- Attachment #3: r8139-20.patch --]
[-- Type: text/plain, Size: 1312 bytes --]

 drivers/net/8139too.c |   11 ++++++++++-
 1 files changed, 10 insertions(+), 1 deletion(-)

diff -puN drivers/net/8139too.c~r8139-20 drivers/net/8139too.c
--- linux-2.6.8-rc2/drivers/net/8139too.c~r8139-20	2004-07-31 12:44:12.000000000 +0200
+++ linux-2.6.8-rc2-romieu/drivers/net/8139too.c	2004-07-31 14:20:36.000000000 +0200
@@ -593,6 +593,7 @@ struct rtl8139_private {
 	int time_to_die;
 	struct mii_if_info mii;
 	unsigned int regs_len;
+	unsigned long fifo_copy_timeout;
 };
 
 MODULE_AUTHOR ("Jeff Garzik <jgarzik@pobox.com>");
@@ -1937,7 +1938,7 @@ static int rtl8139_rx(struct net_device 
 	u16 status;
 
 	DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x,"
-		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
+		 " free to %4.4x, Cmd %2.2x.\n", dev->name, (u16)cur_rx,
 		 RTL_R16 (RxBufAddr),
 		 RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
 
@@ -1977,6 +1978,14 @@ static int rtl8139_rx(struct net_device 
 		 */
 		if (unlikely(rx_size == 0xfff0)) {
 			tp->xstats.early_rx++;
+			if (!tp->fifo_copy_timeout)
+				tp->fifo_copy_timeout = jiffies + 2;
+			else if (time_after(jiffies, tp->fifo_copy_timeout)) {
+				DPRINTK ("%s: hung FIFO. Reset.", dev->name);
+				tp->fifo_copy_timeout = 0;
+				rtl8139_rx_err (rx_status, dev, tp, ioaddr);
+				return -1;
+			}
 			goto done;
 		}
 

_

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-07-31 12:33                               ` Francois Romieu
@ 2004-08-02  9:57                                 ` Pasi Sjoholm
  2004-08-02 10:03                                   ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-08-02  9:57 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Sat, 31 Jul 2004, Francois Romieu wrote:

> Pasi Sjoholm <ptsjohol@cc.jyu.fi> :
> [interesting report]
>> The hardest part is to tell where the problem is but I think that 
>> rtl8139_poll-function would be good place to start looking for the bug?
>> readprofile didn't tell much.. Where to go next? I'm not so good 
>> programmer that I could find the right place to fix..

> In case it could make a difference: did you check if CONFIG_8139_OLD_RX_RESET
> changes the behavior or not ?

Yep, I tried that one also, didn't help a thing.
 
> If it does not, I'd welcome a test report + log with the two attached patch
> applied. The first one is just a placebo but the second one could help.

We are making some sort of a progress but not enough. 

Patch r8139-20.patch didn't help. I made some printk(...)-debug messages
and that part of a code was never ran.

With both patch applied and RTL8139DEBUG = 1 I couldn't make the driver 
crash but without DEBUG it did crash. I assume that it has something to 
with fact that syslog did take so much io-bandwidth. (a couple of minutes log 
was ~1GB =))

But without: 

--
@@ -2024,17 +2024,17 @@ static int rtl8139_rx(struct net_device

                cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
                RTL_W16 (RxBufPtr, (u16) (cur_rx - 16));
+       }

-               /* Clear out errors and receive interrupts */
-               status = RTL_R16 (IntrStatus) & RxAckBits;
-               if (likely(status != 0)) {
-                       if (unlikely(status & (RxFIFOOver | RxOverflow))) 
{
-                               tp->stats.rx_errors++;
 -                               if (status & RxFIFOOver)
-                                       tp->stats.rx_fifo_errors++;
-                       }
-                       RTL_W16_F (IntrStatus, RxAckBits);
+       /* Clear out errors and receive interrupts */
+       status = RTL_R16 (IntrStatus) & RxAckBits;
+       if (likely(status != 0)) {
+               if (unlikely(status & (RxFIFOOver | RxOverflow))) {
+                       tp->stats.rx_errors++;
+                       if (status & RxFIFOOver)
+                               tp->stats.rx_fifo_errors++;
                }
+               RTL_W16_F (IntrStatus, RxAckBits);
        }

  done:
--

the driver crashed... even with debug-option was turned on.
Everytime the ksoftirqd started to take cpu-time there were this line in 
the logs:

--
Aug  2 12:10:37 139_interrupt: eth0:.....
--

Notice the 139... it should read rtl8139_interrupt:



Here is a snapshot from the log file when driver is crashing: 
Full logfile is available from: 

http://www.cc.jyu.fi/~ptsjohol/syslog-debug.gz

--
Aug  2 12:10:37 rtl8139_rx: eth0: In rtl8139_rx(), current 03ac BufAddr 036c, free to 039c, Cmd 0c.
Aug  2 12:10:37 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0000.
Aug  2 12:10:37 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0001.
Aug  2 12:10:37 rtl8139_rx: eth0: In rtl8139_rx(), current 0484 BufAddr 0444, free to 0474, Cmd 0c.
Aug  2 12:10:37 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0000.
Aug  2 12:10:37 139_interrupt: eth0: exiting interrupt, intr_status=0x0010.
Aug  2 12:10:37 rtl8139_rx: eth0: In rtl8139_rx(), current 04d0 BufAddr 04cc, free to 04c0, Cmd 0d.
Aug  2 12:10:37 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0010.
Aug  2 12:10:37 rtl8139_rx: eth0: In rtl8139_rx(), current 04d0 BufAddr 04cc, free to 04c0, Cmd 0d.
Aug  2 12:10:37 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0010.
Aug  2 12:10:37 rtl8139_rx: eth0: In rtl8139_rx(), current 04d0 BufAddr 04cc, free to 04c0, Cmd 0d.
......
......
Aug  2 12:12:11 rtl8139_rx: eth0: In rtl8139_rx(), current 04d0 BufAddr 04cc, free to 04c0, Cmd 0d.
Aug  2 12:12:11 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0050.
Aug  2 12:12:11 rtl8139_rx: eth0: In rtl8139_rx(), current 04d0 BufAddr 04cc, free to 04c0, Cmd 0d.
Aug  2 12:12:11 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0050.
Aug  2 12:12:11 rtl8139_rx: eth0: In rtl8139_rx(), current 04d0 BufAddr 04cc, free to 04c0, Cmd 0d.
Aug  2 12:12:11 rtl8139_interrupt: eth0: exiting interrupt, intr_status=0x0050.
--


--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-02  9:57                                 ` Pasi Sjoholm
@ 2004-08-02 10:03                                   ` Pasi Sjoholm
  2004-08-02 22:35                                     ` Francois Romieu
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-08-02 10:03 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

I forgot to mention that it was quite hard to crash the driver with that 
/* Clear out errors and receive interrupts */-patch. Took about 15minutes 
everytime, when normally it takes about 2mins.

On Mon, 2 Aug 2004, Pasi Sjoholm wrote:

> With both patch applied and RTL8139DEBUG = 1 I couldn't make the driver 
> crash but without DEBUG it did crash. I assume that it has something to 
> with fact that syslog did take so much io-bandwidth. (a couple of minutes log 
> was ~1GB =))
> 
> But without: 
> 
> --
> @@ -2024,17 +2024,17 @@ static int rtl8139_rx(struct net_device
> 
>                 cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
>                 RTL_W16 (RxBufPtr, (u16) (cur_rx - 16));
> +       }
> 
> -               /* Clear out errors and receive interrupts */
> -               status = RTL_R16 (IntrStatus) & RxAckBits;
> -               if (likely(status != 0)) {
> -                       if (unlikely(status & (RxFIFOOver | RxOverflow))) 
> {
> -                               tp->stats.rx_errors++;
>  -                               if (status & RxFIFOOver)
> -                                       tp->stats.rx_fifo_errors++;
> -                       }
> -                       RTL_W16_F (IntrStatus, RxAckBits);
> +       /* Clear out errors and receive interrupts */
> +       status = RTL_R16 (IntrStatus) & RxAckBits;
> +       if (likely(status != 0)) {
> +               if (unlikely(status & (RxFIFOOver | RxOverflow))) {
> +                       tp->stats.rx_errors++;
> +                       if (status & RxFIFOOver)
> +                               tp->stats.rx_fifo_errors++;
>                 }
> +               RTL_W16_F (IntrStatus, RxAckBits);
>         }
> 
>   done:
> --
> 
> the driver crashed... even with debug-option was turned on.
> Everytime the ksoftirqd started to take cpu-time there were this line in 
> the logs:

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-02 10:03                                   ` Pasi Sjoholm
@ 2004-08-02 22:35                                     ` Francois Romieu
  2004-08-03 12:32                                       ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Francois Romieu @ 2004-08-02 22:35 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

[-- Attachment #1: Type: text/plain, Size: 599 bytes --]

Pasi Sjoholm <ptsjohol@cc.jyu.fi> :
> I forgot to mention that it was quite hard to crash the driver with that 
> /* Clear out errors and receive interrupts */-patch. Took about 15minutes 
> everytime, when normally it takes about 2mins.

I have made a few changes. Please enable the DEBUG option and set msglvl
to its maximal value via ethtool. You may test the patches separately if
you find some time but the log once both r8139-10.patch and r8139-20.patch
are applied would be enough.

If the log fills too fast, you may comment out any message which does
not belong to rtl8139_rx().

--
Ueimor

[-- Attachment #2: r8139-10.patch --]
[-- Type: text/plain, Size: 2588 bytes --]


- read the interruption status word that the driver will ack before the
  actual processing is done;
- avoid a few heavy pci transactions when several packets are received at
  the same time.


 drivers/net/8139too.c |   17 ++++++++++-------
 1 files changed, 10 insertions(+), 7 deletions(-)

diff -puN drivers/net/8139too.c~r8139-10 drivers/net/8139too.c
--- linux-2.6.8-rc2/drivers/net/8139too.c~r8139-10	2004-08-02 22:39:24.000000000 +0200
+++ linux-2.6.8-rc2-romieu/drivers/net/8139too.c	2004-08-02 23:08:00.000000000 +0200
@@ -1934,12 +1934,15 @@ static int rtl8139_rx(struct net_device 
 	int received = 0;
 	unsigned char *rx_ring = tp->rx_ring;
 	unsigned int cur_rx = tp->cur_rx;
+	u16 status;
 
 	DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x,"
 		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
 		 RTL_R16 (RxBufAddr),
 		 RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
 
+	status = RTL_R16 (IntrStatus) & RxAckBits;
+
 	while (netif_running(dev) && received < budget 
 	       && (RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {
 		u32 ring_offset = cur_rx % RX_BUF_LEN;
@@ -1947,7 +1950,6 @@ static int rtl8139_rx(struct net_device 
 		unsigned int rx_size;
 		unsigned int pkt_size;
 		struct sk_buff *skb;
-		u16 status;
 
 		rmb();
 
@@ -1977,7 +1979,7 @@ static int rtl8139_rx(struct net_device 
 		 */
 		if (unlikely(rx_size == 0xfff0)) {
 			tp->xstats.early_rx++;
-			goto done;
+			break;
 		}
 
 		/* If Rx err or invalid rx_size/rx_status received
@@ -1989,7 +1991,8 @@ static int rtl8139_rx(struct net_device 
 			     (rx_size < 8) ||
 			     (!(rx_status & RxStatusOK)))) {
 			rtl8139_rx_err (rx_status, dev, tp, ioaddr);
-			return -1;
+			received = -1;
+			goto out;
 		}
 
 		/* Malloc up new buffer, compatible with net-2e. */
@@ -2024,21 +2027,20 @@ static int rtl8139_rx(struct net_device 
 
 		cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
 		RTL_W16 (RxBufPtr, (u16) (cur_rx - 16));
+	}
 
+	if (received > 0) {
 		/* Clear out errors and receive interrupts */
-		status = RTL_R16 (IntrStatus) & RxAckBits;
 		if (likely(status != 0)) {
 			if (unlikely(status & (RxFIFOOver | RxOverflow))) {
 				tp->stats.rx_errors++;
-				if (status & RxFIFOOver)
+			if (status & RxFIFOOver)
 					tp->stats.rx_fifo_errors++;
 			}
 			RTL_W16_F (IntrStatus, RxAckBits);
 		}
 	}
 
- done:
-
 #if RTL8139_DEBUG > 1
 	DPRINTK ("%s: Done rtl8139_rx(), current %4.4x BufAddr %4.4x,"
 		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
@@ -2047,6 +2049,7 @@ static int rtl8139_rx(struct net_device 
 #endif
 
 	tp->cur_rx = cur_rx;
+out:
 	return received;
 }
 

_

[-- Attachment #3: r8139-20.patch --]
[-- Type: text/plain, Size: 1999 bytes --]


Let's be sure that the driver will make some progress/reset when
crap hits the packet size descriptor.


 drivers/net/8139too.c |   25 ++++++++++++++++++++++++-
 1 files changed, 24 insertions(+), 1 deletion(-)

diff -puN drivers/net/8139too.c~r8139-20 drivers/net/8139too.c
--- linux-2.6.8-rc2/drivers/net/8139too.c~r8139-20	2004-08-02 23:21:36.000000000 +0200
+++ linux-2.6.8-rc2-romieu/drivers/net/8139too.c	2004-08-03 00:13:56.000000000 +0200
@@ -593,6 +593,7 @@ struct rtl8139_private {
 	int time_to_die;
 	struct mii_if_info mii;
 	unsigned int regs_len;
+	unsigned long fifo_copy_timeout;
 };
 
 MODULE_AUTHOR ("Jeff Garzik <jgarzik@pobox.com>");
@@ -1937,7 +1938,7 @@ static int rtl8139_rx(struct net_device 
 	u16 status;
 
 	DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x,"
-		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
+		 " free to %4.4x, Cmd %2.2x.\n", dev->name, (u16)cur_rx,
 		 RTL_R16 (RxBufAddr),
 		 RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
 
@@ -1978,10 +1979,24 @@ static int rtl8139_rx(struct net_device 
 		 * since EarlyRx is disabled.
 		 */
 		if (unlikely(rx_size == 0xfff0)) {
+			if (!tp->fifo_copy_timeout)
+				tp->fifo_copy_timeout = jiffies + 2;
+			else if (time_after(jiffies, tp->fifo_copy_timeout)) {
+				DPRINTK ("%s: hung FIFO. Reset.", dev->name);
+				rx_size = 0;
+				goto no_early_rx;
+			}
+			if (netif_msg_intr(tp)) {
+				printk(KERN_DEBUG "%s: fifo copy in progress.",
+				       dev->name);
+			}
 			tp->xstats.early_rx++;
 			break;
 		}
 
+no_early_rx:
+		tp->fifo_copy_timeout = 0;
+
 		/* If Rx err or invalid rx_size/rx_status received
 		 * (which happens if we get lost in the ring),
 		 * Rx process gets reset, so we abort any further
@@ -2049,6 +2064,14 @@ static int rtl8139_rx(struct net_device 
 #endif
 
 	tp->cur_rx = cur_rx;
+
+	/*
+	 * The receive buffer should be mostly empty.
+	 * Tell NAPI to reenable the Rx irq.
+	 */
+	if (tp->fifo_copy_timeout)
+		received = budget;
+
 out:
 	return received;
 }

_

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-02 22:35                                     ` Francois Romieu
@ 2004-08-03 12:32                                       ` Pasi Sjoholm
  2004-08-03 16:50                                         ` Francois Romieu
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-08-03 12:32 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Tue, 3 Aug 2004, Francois Romieu wrote:

> I have made a few changes. Please enable the DEBUG option and set msglvl
> to its maximal value via ethtool. You may test the patches separately if
> you find some time but the log once both r8139-10.patch and r8139-20.patch
> are applied would be enough.

The full logfiles can be downloaded from:

http://www.cc.jyu.fi/~ptsjohol/syslog1.gz 
http://www.cc.jyu.fi/~ptsjohol/syslog2.gz 

The first log file is with both patchs applied and the second one with one 
little change to rx8139_rx() to show if it even goes to through 

"        while (netif_running(dev) && received < budget
               && (RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {"-section.

This was the change which I made.. so you can see in the second log file 
that there won't be any of these messages after the driver has crashed. 

/*              if (netif_msg_rx_status(tp))*/
        printk(KERN_DEBUG "%s:  rtl8139_rx() status %4.4x, size %4.4x,"
        " cur %4.4x.\n", dev->name, rx_status, rx_size, cur_rx);

For the first logfile the exact crash time is "13:02:22" and the for the 
second one it is "14:54:49".

> If the log fills too fast, you may comment out any message which does
> not belong to rtl8139_rx().

I took out those "exiting with interrupt"-messages.

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-03 12:32                                       ` Pasi Sjoholm
@ 2004-08-03 16:50                                         ` Francois Romieu
  2004-08-03 20:19                                           ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Francois Romieu @ 2004-08-03 16:50 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

Pasi Sjoholm <ptsjohol@cc.jyu.fi> :
[...]
> The first log file is with both patchs applied and the second one with one 
> little change to rx8139_rx() to show if it even goes to through 
> 
> "        while (netif_running(dev) && received < budget
>                && (RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {"-section.
> 
> This was the change which I made.. so you can see in the second log file 
> that there won't be any of these messages after the driver has crashed. 

If you remove the "if (received > 0) {" test in r8139-10.patch and keep
both patches applied, I assume you are back to a crash within 15min (instead
of within 2min as suggested by the log), right ?

--
Ueimor

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-03 16:50                                         ` Francois Romieu
@ 2004-08-03 20:19                                           ` Pasi Sjoholm
  2004-08-04 16:26                                             ` Pasi Sjoholm
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-08-03 20:19 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Tue, 3 Aug 2004, Francois Romieu wrote:

> > The first log file is with both patchs applied and the second one with one 
> > little change to rx8139_rx() to show if it even goes to through 
> > 
> > "        while (netif_running(dev) && received < budget
> >                && (RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {"-section.
> > 
> > This was the change which I made.. so you can see in the second log file 
> > that there won't be any of these messages after the driver has crashed. 
> If you remove the "if (received > 0) {" test in r8139-10.patch and keep
> both patches applied, I assume you are back to a crash within 15min (instead
> of within 2min as suggested by the log), right ?

Hmm,

I removed "if (received > 0) {" and tested it something like 3 hours and 
wasn't able to crash the driver. I will test it for couple more hours 
tomorrow and if I'm not still able the crash it, we may have find some 
sort of a solution.

I'm not sure yet if it's a good one because of that earlier crash I had.
I guess I will also test if

- read the interruption status word that the driver will ack before the
  actual processing is done;

has something to do it.

We'll see.. I'll get back to you tomorrow with more information.

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-03 20:19                                           ` Pasi Sjoholm
@ 2004-08-04 16:26                                             ` Pasi Sjoholm
  2004-08-05 10:14                                               ` Hector Martin
  0 siblings, 1 reply; 32+ messages in thread
From: Pasi Sjoholm @ 2004-08-04 16:26 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Robert Olsson, H?ctor Mart?n, Linux-Kernel, akpm, netdev, brad,
	shemminger

On Tue, 3 Aug 2004, Pasi Sjoholm wrote:

> > If you remove the "if (received > 0) {" test in r8139-10.patch and keep
> > both patches applied, I assume you are back to a crash within 15min (instead
> > of within 2min as suggested by the log), right ?
> I removed "if (received > 0) {" and tested it something like 3 hours and 
> wasn't able to crash the driver. I will test it for couple more hours 
> tomorrow and if I'm not still able the crash it, we may have find some 
> sort of a solution.
> I'm not sure yet if it's a good one because of that earlier crash I had.
> I guess I will also test if
> "- read the interruption status word that the driver will ack before the
>   actual processing is done;" has something to do with it.

Ok, now I have tested it for 6 hours without crashing the driver. The 
system's load has been something like 5-6 the whole time. I also made some 
network load with ~90Mbps-incoming and ~90Mbps-outgoing traffic. 

I haven't had time to test anything else but I'm quite sure that there is 
no need for that anymore because the stability we have reached. 

I'll let you know if there's any problems within next few days but I would 
recommend that those patches would be included in 2.6.8. (without that "if 
(received > 0) {").

Many thanks for your help to resolve this problem. 

Hector, have you tested these patches?

--
Pasi Sjöholm

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-04 16:26                                             ` Pasi Sjoholm
@ 2004-08-05 10:14                                               ` Hector Martin
  2004-08-05 10:55                                                 ` Hector Martin
  0 siblings, 1 reply; 32+ messages in thread
From: Hector Martin @ 2004-08-05 10:14 UTC (permalink / raw)
  To: Pasi Sjoholm
  Cc: Francois Romieu, Robert Olsson, Linux-Kernel, akpm, netdev, brad,
	shemminger

Pasi Sjoholm wrote:

>Ok, now I have tested it for 6 hours without crashing the driver. The 
>system's load has been something like 5-6 the whole time. I also made some 
>network load with ~90Mbps-incoming and ~90Mbps-outgoing traffic. 
>
>I haven't had time to test anything else but I'm quite sure that there is 
>no need for that anymore because the stability we have reached. 
>
>I'll let you know if there's any problems within next few days but I would 
>recommend that those patches would be included in 2.6.8. (without that "if 
>(received > 0) {").
>
>Many thanks for your help to resolve this problem. 
>
>Hector, have you tested these patches?
>
>  
>
Wow.. I gotta learn some more about kernel hacking someday.. lol

I applied both (new) -10 and -20 patches and removed the test.

I doubled the debug-messages that get sent to the PC (the UDP traffic) 
but it froze (the sender) and stopped sending them after 5 minutes. I 
guess that's because of the crappy TCP/IP stack on the other side (this 
is a PlayStation2 application i'm developing, and the homebrew PS2 
TCP/IP stack doesn't have a good reputation.) I'm back to normal 
debugging, I'll test it for a couple of hours. So far, no problem.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-05 10:14                                               ` Hector Martin
@ 2004-08-05 10:55                                                 ` Hector Martin
  2004-08-05 11:22                                                   ` Francois Romieu
  0 siblings, 1 reply; 32+ messages in thread
From: Hector Martin @ 2004-08-05 10:55 UTC (permalink / raw)
  To: Hector Martin
  Cc: Pasi Sjoholm, Francois Romieu, Robert Olsson, Linux-Kernel, akpm,
	netdev, brad, shemminger

Hector Martin escribió:

> Pasi Sjoholm wrote:
>
>> Ok, now I have tested it for 6 hours without crashing the driver. The 
>> system's load has been something like 5-6 the whole time. I also made 
>> some network load with ~90Mbps-incoming and ~90Mbps-outgoing traffic.
>> I haven't had time to test anything else but I'm quite sure that 
>> there is no need for that anymore because the stability we have reached.
>> I'll let you know if there's any problems within next few days but I 
>> would recommend that those patches would be included in 2.6.8. 
>> (without that "if (received > 0) {").
>>
>> Many thanks for your help to resolve this problem.
>> Hector, have you tested these patches?
>>
>>  
>>
> Wow.. I gotta learn some more about kernel hacking someday.. lol
>
> I applied both (new) -10 and -20 patches and removed the test.
>
> I doubled the debug-messages that get sent to the PC (the UDP traffic) 
> but it froze (the sender) and stopped sending them after 5 minutes. I 
> guess that's because of the crappy TCP/IP stack on the other side 
> (this is a PlayStation2 application i'm developing, and the homebrew 
> PS2 TCP/IP stack doesn't have a good reputation.) I'm back to normal 
> debugging, I'll test it for a couple of hours. So far, no problem.
> -
> To unsubscribe from this list: send the line "unsubscribe 
> linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
>
>
Ugh.. it stopped sending them again. Now I doubled them again and I'm 
using a stabler TCP/IP stack. This one works OK. No problem so far... 
anyway, even though it stopped and I had to restart the PS2 some times, 
the PC has been receiving packets with no reboot whatsoever. I think 
it's fixed :)

Now I have to tackle a completely different problem with TCP connections 
on linux, but that's another story :)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-05 10:55                                                 ` Hector Martin
@ 2004-08-05 11:22                                                   ` Francois Romieu
  2004-08-05 15:28                                                     ` OGAWA Hirofumi
  0 siblings, 1 reply; 32+ messages in thread
From: Francois Romieu @ 2004-08-05 11:22 UTC (permalink / raw)
  To: Hector Martin
  Cc: Pasi Sjoholm, Robert Olsson, Linux-Kernel, akpm, netdev, brad,
	shemminger

Hector Martin <hector@marcansoft.com> :
[...]
> using a stabler TCP/IP stack. This one works OK. No problem so far... 
> anyway, even though it stopped and I had to restart the PS2 some times, 
> the PC has been receiving packets with no reboot whatsoever. I think 
> it's fixed :)

Ok, I'll send the final version of the patches for inclusion in -netdev
and/or -mm this evening. It will provide a broader testing.

--
Ueimor

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-05 11:22                                                   ` Francois Romieu
@ 2004-08-05 15:28                                                     ` OGAWA Hirofumi
  2004-08-05 17:21                                                       ` Francois Romieu
  0 siblings, 1 reply; 32+ messages in thread
From: OGAWA Hirofumi @ 2004-08-05 15:28 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Hector Martin, Pasi Sjoholm, Robert Olsson, Linux-Kernel, akpm,
	netdev, brad, shemminger

Francois Romieu <romieu@fr.zoreil.com> writes:

> Hector Martin <hector@marcansoft.com> :
> [...]
> > using a stabler TCP/IP stack. This one works OK. No problem so far... 
> > anyway, even though it stopped and I had to restart the PS2 some times, 
> > the PC has been receiving packets with no reboot whatsoever. I think 
> > it's fixed :)
> 
> Ok, I'll send the final version of the patches for inclusion in -netdev
> and/or -mm this evening. It will provide a broader testing.

Bit interesting.

On the final analysis, what was the cause of this problem?
I found the following in progguide-8100(100).pdf. Does this help something?

2.5 Software Issues

This section covers the handling of various data reception topics.

    1. Handling a Receive Buffer Overflow:
       The Rx DMA (FIFO to buffer) is stopped. The CAPR must be
       updated first to dismiss the ISR (RxBufferOverflow) event. The
       correct actions to process RxBufOvw are:
           a. Update CAPR.
           b. Write a 1 to ISR (ROK).
       The Rx DMA resumes after step b.

    2. Handling RxFIFOOvw:
       When RxFIFOOvw occurs, all incoming packets are discarded.
       Clearing ISR (RxFIFOOvw) doesn t dismiss the RxFIFOOvw event.
       To dismiss the RxFIFOOvw event, the ISR (RxBufOvw) must be
       written with a 1.

-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related)
  2004-08-05 15:28                                                     ` OGAWA Hirofumi
@ 2004-08-05 17:21                                                       ` Francois Romieu
  0 siblings, 0 replies; 32+ messages in thread
From: Francois Romieu @ 2004-08-05 17:21 UTC (permalink / raw)
  To: OGAWA Hirofumi
  Cc: Hector Martin, Pasi Sjoholm, Robert Olsson, Linux-Kernel, akpm,
	netdev, brad, shemminger

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> :
[...]
> On the final analysis, what was the cause of this problem?

See answer 42

> I found the following in progguide-8100(100).pdf. Does this help something?

It makes sense. The previous code did not allow the ISR to be written under
any circumstance.

--
Ueimor

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2004-08-05 17:21 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20040725235927.B30025@electric-eye.fr.zoreil.com>
2004-07-26 15:15 ` ksoftirqd uses 99% CPU triggered by network traffic (maybe RLT-8139 related) Pasi Sjoholm
2004-07-26 16:37   ` Robert Olsson
2004-07-26 21:46     ` Pasi Sjoholm
2004-07-26 22:38       ` Robert Olsson
2004-07-27  0:16         ` Pasi Sjoholm
2004-07-27 11:10           ` Robert Olsson
2004-07-27 14:20             ` Pasi Sjoholm
2004-07-27 20:24               ` Robert Olsson
2004-07-27 21:27                 ` Pasi Sjoholm
2004-07-28 18:35                   ` Robert Olsson
2004-07-28 19:09                     ` James Morris
2004-07-29  0:46                     ` Pasi Sjoholm
2004-07-30  9:18                       ` Robert Olsson
2004-07-30 12:56                         ` Pasi Sjoholm
2004-07-30 14:05                           ` Robert Olsson
2004-07-30 18:40                             ` Pasi Sjoholm
2004-07-31 12:33                               ` Francois Romieu
2004-08-02  9:57                                 ` Pasi Sjoholm
2004-08-02 10:03                                   ` Pasi Sjoholm
2004-08-02 22:35                                     ` Francois Romieu
2004-08-03 12:32                                       ` Pasi Sjoholm
2004-08-03 16:50                                         ` Francois Romieu
2004-08-03 20:19                                           ` Pasi Sjoholm
2004-08-04 16:26                                             ` Pasi Sjoholm
2004-08-05 10:14                                               ` Hector Martin
2004-08-05 10:55                                                 ` Hector Martin
2004-08-05 11:22                                                   ` Francois Romieu
2004-08-05 15:28                                                     ` OGAWA Hirofumi
2004-08-05 17:21                                                       ` Francois Romieu
2004-07-30 14:12                           ` Robert Olsson
2004-07-26 16:55   ` Stephen Hemminger
2004-07-26 21:28     ` Pasi Sjoholm

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).