Re: [PATCH] qdisc oops fix

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* Re: [PATCH] qdisc oops fix
@ 2003-04-15 22:42 Julian Anastasov
  2003-04-16  0:53 ` jamal
  2003-04-16  6:30 ` devik
  0 siblings, 2 replies; 23+ messages in thread
From: Julian Anastasov @ 2003-04-15 22:42 UTC (permalink / raw)
  To: Tomas Szepe; +Cc: netdev, jamal, Martin Devera


	Hello,

	May be this is not the right fix:

> -       sch = kmalloc(size, GFP_KERNEL);
> +       sch = kmalloc(size, GFP_ATOMIC);

	better Devik to provide fix for htb_change_class that will not 
call qdisc_create_dflt under bh lock (sch_tree_lock).

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-15 22:42 [PATCH] qdisc oops fix Julian Anastasov
@ 2003-04-16  0:53 ` jamal
  2003-04-16  6:30 ` devik
  1 sibling, 0 replies; 23+ messages in thread
From: jamal @ 2003-04-16  0:53 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: Tomas Szepe, netdev, Martin Devera




Yep. That looks like the problem. Thanks for narrowing it down Julian.

cheers,
jamal


On Wed, 16 Apr 2003, Julian Anastasov wrote:

>
> 	Hello,
>
> 	May be this is not the right fix:
>
> > -       sch = kmalloc(size, GFP_KERNEL);
> > +       sch = kmalloc(size, GFP_ATOMIC);
>
> 	better Devik to provide fix for htb_change_class that will not
> call qdisc_create_dflt under bh lock (sch_tree_lock).
>
> Regards
>
> --
> Julian Anastasov <ja@ssi.bg>
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-15 22:42 [PATCH] qdisc oops fix Julian Anastasov
  2003-04-16  0:53 ` jamal
@ 2003-04-16  6:30 ` devik
  2003-04-16  7:24   ` Julian Anastasov
  1 sibling, 1 reply; 23+ messages in thread
From: devik @ 2003-04-16  6:30 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: Tomas Szepe, netdev, jamal

[-- Attachment #1: Type: TEXT/PLAIN, Size: 664 bytes --]

Hello,

The bug you are speaking about was fixed at 27 Jan 2003
and is included in 2.4.21pre (I hope ;-).
I'm attaching diff from 2.4.20 version to my current one.

regards,
-------------------------------
    Martin Devera aka devik
Linux kernel QoS/HTB maintainer
  http://luxik.cdi.cz/~devik/

On Wed, 16 Apr 2003, Julian Anastasov wrote:

>
> 	Hello,
>
> 	May be this is not the right fix:
>
> > -       sch = kmalloc(size, GFP_KERNEL);
> > +       sch = kmalloc(size, GFP_ATOMIC);
>
> 	better Devik to provide fix for htb_change_class that will not
> call qdisc_create_dflt under bh lock (sch_tree_lock).
>
> Regards
>
> --
> Julian Anastasov <ja@ssi.bg>
>
>

[-- Attachment #2: Type: TEXT/PLAIN, Size: 8861 bytes --]

--- sch_htb.c	2003/01/11 21:31:31	1.16
+++ sch_htb.c	2003/03/20 14:08:36
@@ -19,7 +19,7 @@
  *			created test case so that I was able to fix nasty bug
  *		and many others. thanks.
  *
- * $Id: sch_htb.c,v 1.16 2003/01/11 21:31:31 devik Exp $
+ * $Id: sch_htb.c,v 1.19 2003/03/20 12:13:14 devik Exp devik $
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -71,16 +71,12 @@
 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30008	/* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x3000c	/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
-/* temporary debug defines to be removed after beta stage */
-#define DEVIK_MEND(N)
-#define DEVIK_MSTART(N)
-
 /* debugging support; S is subsystem, these are defined:
   0 - netlink messages
   1 - enqueue
@@ -219,6 +215,9 @@
     /* time of nearest event per level (row) */
     unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
 
+    /* cached value of jiffies in dequeue */
+    unsigned long jiffies;
+
     /* whether we hit non-work conserving class during this dequeue; we use */
     int nwc_hit;	/* this to disable mindelay complaint in dequeue */
 
@@ -338,7 +337,7 @@
 static void htb_debug_dump (struct htb_sched *q)
 {
 	int i,p;
-	printk(KERN_DEBUG "htb*g j=%lu\n",jiffies);
+	printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
 	/* rows */
 	for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
 		printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
@@ -421,9 +420,8 @@
 	if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
 		printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
 #endif
-	DEVIK_MSTART(9);
-	cl->pq_key = jiffies + PSCHED_US2JIFFIE(delay);
-	if (cl->pq_key == jiffies)
+	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
+	if (cl->pq_key == q->jiffies)
 		cl->pq_key++;
 
 	/* update the nearest event cache */
@@ -440,7 +438,6 @@
 	}
 	rb_link_node(&cl->pq_node, parent, p);
 	rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
-	DEVIK_MEND(9);
 }
 
 /**
@@ -689,7 +686,6 @@
     struct htb_sched *q = (struct htb_sched *)sch->data;
     struct htb_class *cl = htb_classify(skb,sch);
 
-    DEVIK_MSTART(0);
     if (cl == HTB_DIRECT || !cl) {
 	/* enqueue to helper queue */
 	if (q->direct_queue.qlen < q->direct_qlen && cl) {
@@ -698,25 +694,20 @@
 	} else {
 	    kfree_skb (skb);
 	    sch->stats.drops++;
-	    DEVIK_MEND(0);
 	    return NET_XMIT_DROP;
 	}
     } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
 	sch->stats.drops++;
 	cl->stats.drops++;
-	DEVIK_MEND(0);
 	return NET_XMIT_DROP;
     } else {
 	cl->stats.packets++; cl->stats.bytes += skb->len;
-	DEVIK_MSTART(1);
 	htb_activate (q,cl);
-	DEVIK_MEND(1);
     }
 
     sch->q.qlen++;
     sch->stats.packets++; sch->stats.bytes += skb->len;
     HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
-    DEVIK_MEND(0);
     return NET_XMIT_SUCCESS;
 }
 
@@ -819,7 +810,7 @@
 				       cl->classid, diff,
 				       (unsigned long long) q->now,
 				       (unsigned long long) cl->t_c,
-				       jiffies);
+				       q->jiffies);
 			diff = 1000;
 		}
 #endif
@@ -862,6 +853,7 @@
  *
  * Scans event queue for pending events and applies them. Returns jiffies to
  * next pending event (0 for no event in pq).
+ * Note: Aplied are events whose have cl->pq_key <= jiffies.
  */
 static long htb_do_events(struct htb_sched *q,int level)
 {
@@ -876,9 +868,9 @@
 		while (p->rb_left) p = p->rb_left;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
-		if (cl->pq_key - (jiffies+1) < 0x80000000) {
-			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - jiffies);
-			return cl->pq_key - jiffies;
+		if (cl->pq_key - (q->jiffies+1) < 0x80000000) {
+			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
+			return cl->pq_key - q->jiffies;
 		}
 		htb_safe_rb_erase(p,q->wait_pq+level);
 		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
@@ -889,7 +881,7 @@
 				       cl->classid, diff,
 				       (unsigned long long) q->now,
 				       (unsigned long long) cl->t_c,
-				       jiffies);
+				       q->jiffies);
 			diff = 1000;
 		}
 #endif
@@ -952,7 +944,6 @@
 	//struct htb_sched *q = (struct htb_sched *)sch->data;
 	struct htb_class *cl,*start;
 	/* look initial class up in the row */
-	DEVIK_MSTART(6);
 	start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
 	
 	do {
@@ -971,8 +962,6 @@
 		cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
 	} while (cl != start);
 
-	DEVIK_MEND(6);
-	DEVIK_MSTART(7);
 	if (likely(skb != NULL)) {
 		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
 			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
@@ -984,11 +973,8 @@
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
 			htb_deactivate (q,cl);
-	DEVIK_MSTART(8);
 		htb_charge_class (q,cl,level,skb->len);
-	DEVIK_MEND(8);
 	}
-	DEVIK_MEND(7);
 	return skb;
 }
 
@@ -1003,7 +989,8 @@
 		delay = 5*HZ;
 	}
 	del_timer(&q->timer);
-	q->timer.expires = jiffies + delay;
+	/* why don't use jiffies here ? because expires can be in past */
+	q->timer.expires = q->jiffies + delay;
 	add_timer(&q->timer);
 	sch->flags |= TCQ_F_THROTTLED;
 	sch->stats.overlimits++;
@@ -1016,7 +1003,11 @@
 	struct htb_sched *q = (struct htb_sched *)sch->data;
 	int level;
 	long min_delay;
+#ifdef HTB_DEBUG
+	int evs_used = 0;
+#endif
 
+	q->jiffies = jiffies;
 	HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
 			sch->q.qlen);
 
@@ -1027,7 +1018,6 @@
 		return skb;
 	}
 
-	DEVIK_MSTART(2);
 	if (!sch->q.qlen) goto fin;
 	PSCHED_GET_TIME(q->now);
 
@@ -1037,17 +1027,17 @@
 		/* common case optimization - skip event handler quickly */
 		int m;
 		long delay;
-	DEVIK_MSTART(3);
-		if (jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
+		if (q->jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
 			delay = htb_do_events(q,level);
-			q->near_ev_cache[level] += delay ? delay : HZ;
+			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
+#ifdef HTB_DEBUG
+			evs_used++;
+#endif
 		} else
-			delay = q->near_ev_cache[level] - jiffies;	
+			delay = q->near_ev_cache[level] - q->jiffies;	
 		
 		if (delay && min_delay > delay) 
 			min_delay = delay;
-	DEVIK_MEND(3);
-	DEVIK_MSTART(5);
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
 			int prio = ffz (m);
@@ -1056,29 +1046,24 @@
 			if (likely(skb != NULL)) {
 				sch->q.qlen--;
 				sch->flags &= ~TCQ_F_THROTTLED;
-	DEVIK_MEND(5);
 				goto fin;
 			}
 		}
-	DEVIK_MEND(5);
 	}
-	DEVIK_MSTART(4);
 #ifdef HTB_DEBUG
 	if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
-		if (min_delay == LONG_MAX) 
-			printk(KERN_ERR "HTB: dequeue bug, report it please !\n");
-		else {
+		if (min_delay == LONG_MAX) {
+			printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
+					evs_used,q->jiffies,jiffies);
+			htb_debug_dump(q);
+		} else 
 			printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
 					"too small rate\n",min_delay);
-			htb_debug_dump(q);
-		}
 	}
 #endif
 	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
-	DEVIK_MEND(4);
 fin:
-	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,jiffies,skb);
-	DEVIK_MEND(2);
+	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
 	return skb;
 }
 
@@ -1449,6 +1434,7 @@
 	if (!rtab || !ctab) goto failure;
 
 	if (!cl) { /* new class */
+		struct Qdisc *new_q;
 		/* check for valid classid */
 		if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
 			goto failure;
@@ -1472,6 +1458,10 @@
 		cl->magic = HTB_CMAGIC;
 #endif
 
+		/* create leaf qdisc early because it uses kmalloc(GPF_KERNEL)
+		   so that can't be used inside of sch_tree_lock
+		   -- thanks to Karlis Peisenieks */
+		new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
 			/* turn parent into inner node */
@@ -1490,8 +1480,7 @@
 			memset (&parent->un.inner,0,sizeof(parent->un.inner));
 		}
 		/* leaf (we) needs elementary qdisc */
-		if (!(cl->un.leaf.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
-			cl->un.leaf.q = &noop_qdisc;
+		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
 		cl->classid = classid; cl->parent = parent;
 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16  6:30 ` devik
@ 2003-04-16  7:24   ` Julian Anastasov
  0 siblings, 0 replies; 23+ messages in thread
From: Julian Anastasov @ 2003-04-16  7:24 UTC (permalink / raw)
  To: devik; +Cc: Tomas Szepe, netdev, jamal


	Hello,

On Wed, 16 Apr 2003, devik wrote:

> The bug you are speaking about was fixed at 27 Jan 2003
> and is included in 2.4.21pre (I hope ;-).
> I'm attaching diff from 2.4.20 version to my current one.

	Oh, yes, I'm sleeping :) I'm even using this update, sorry!

> regards,
> -------------------------------
>     Martin Devera aka devik
> Linux kernel QoS/HTB maintainer
>   http://luxik.cdi.cz/~devik/

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
@ 2003-04-15 13:09 jamal
  2003-04-15 13:43 ` Tomas Szepe
  2003-04-16  5:41 ` Catalin BOIE
  0 siblings, 2 replies; 23+ messages in thread
From: jamal @ 2003-04-15 13:09 UTC (permalink / raw)
  To: Tomas Szepe; +Cc: linux-kernel, netdev

Hi,

Pass those net patches to the maintainers (not Alan, not Linus, not
Marcello) and CC netdev (optionally cc lk)?

I dont understand why

-       sch = kmalloc(size, GFP_KERNEL);
+       sch = kmalloc(size, GFP_ATOMIC);

mysteriously fixes the problem? Could the problem be elsewhere?
Can you repost what the issue was? I am not on lk and i just saw the
posting on a web page.

cheers,
jamal

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-15 13:09 jamal
@ 2003-04-15 13:43 ` Tomas Szepe
  2003-04-15 14:31   ` jamal
  2003-04-16  5:41 ` Catalin BOIE
  1 sibling, 1 reply; 23+ messages in thread
From: Tomas Szepe @ 2003-04-15 13:43 UTC (permalink / raw)
  To: jamal; +Cc: linux-kernel, netdev

> [hadi@cyberus.ca]
> 
> I dont understand why
> 
> -       sch = kmalloc(size, GFP_KERNEL);
> +       sch = kmalloc(size, GFP_ATOMIC);
> 
> mysteriously fixes the problem? Could the problem be elsewhere?
> Can you repost what the issue was? I am not on lk and i just saw the
> posting on a web page.

Here.

Date: Sat, 12 Apr 2003 10:21:37 +0200
From: Martin Volf <mv@inv.cz>
To: linux-kernel@vger.kernel.org
Subject: qdisc misbehavior detected at slab.c:1128 + fix

Hello,

when loading hundreds of QoS rules by tc on SMP machine (2 Xeons with HT) right after booting the system, I always get kernel BUG at slab.c:1128:

ksymoops 2.4.8 on i686 2.4.20.  Options used
     -V (default)
     -k /proc/ksyms (default)
     -l /proc/modules (default)
     -o /lib/modules/2.4.20/ (default)
     -m /boot/System.map (specified)

kernel BUG at slab.c:1128!
invalid operand: 0000
CPU:    0
EIP:    0010:[<c01367b8>]    Not tainted
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010202
eax: 000001f0   ebx: 00000000   ecx: 000001f0   edx: 00000000
esi: dfff9450   edi: 000001f0   ebp: dc7bda00   esp: dc5e3bfc
ds: 0018   es: 0018   ss: 0018
Process tc (pid: 303, stackpage=dc5e3000)
Stack: 00000246 000001f0 000001f0 dfff9458 dfff9460 dfff9450 00000246 000001f0 
       dc7bda00 c01376fd dfff9450 000001f0 000001f0 dc652e00 c02d3a60 dc711460 
       dc7bda00 c021b809 dfff9450 000001f0 c15fea00 00000064 dc652e00 dc5b8034 
Call Trace:    [<c01376fd>] [<c021b809>] [<e0a4b71d>] [<e0a48108>] [<c021d5aa>]
  [<e0a4d1e0>] [<c0219b18>] [<c0219740>] [<c0219450>] [<c022121a>] [<c02209f1>]
  [<c0220f71>] [<c020a5c5>] [<c020bce7>] [<c0130010>] [<c012d5b5>] [<c012d821>]
  [<c0117f48>] [<c020b11d>] [<c020c1d6>] [<c0117dc0>] [<c0107800>] [<c010770f>]
Code: 0f 0b 68 04 f9 63 27 c0 c7 44 24 0c 01 00 00 00 89 c8 25 f0 


>>EIP; c01367b8 <kmem_cache_grow+58/270>   <=====

>>esi; dfff9450 <_end+1fc91518/20686128>
>>ebp; dc7bda00 <_end+1c455ac8/20686128>
>>esp; dc5e3bfc <_end+1c27bcc4/20686128>

Trace; c01376fd <__kmem_cache_alloc+6d/140>
Trace; c021b809 <qdisc_create_dflt+29/c0>
Trace; e0a4b71d <[sch_htb]htb_change_class+40d/600>
Trace; e0a48108 <[sch_htb]htb_find+58/70>
Trace; c021d5aa <tc_ctl_tclass+14a/2b0>
Trace; e0a4d1e0 <[sch_htb]htb_class_ops+0/0>
Trace; c0219b18 <rtnetlink_rcv_msg+1a8/26d>
Trace; c0219740 <rtnetlink_rcv+c0/1e0>
Trace; c0219450 <rtnetlink_dump_ifinfo+0/90>
Trace; c022121a <netlink_data_ready+7a/80>
Trace; c02209f1 <netlink_unicast+281/330>
Trace; c0220f71 <netlink_sendmsg+1f1/290>
Trace; c020a5c5 <sock_sendmsg+75/c0>
Trace; c020bce7 <sys_sendmsg+1b7/210>
Trace; c0130010 <do_buffer_fdatasync+30/b0>
Trace; c012d5b5 <do_anonymous_page+115/130>
Trace; c012d821 <handle_mm_fault+81/120>
Trace; c0117f48 <do_page_fault+188/523>
Trace; c020b11d <sys_socket+3d/60>
Trace; c020c1d6 <sys_socketcall+246/270>
Trace; c0117dc0 <do_page_fault+0/523>
Trace; c0107800 <error_code+34/3c>
Trace; c010770f <system_call+33/38>

Code;  c01367b8 <kmem_cache_grow+58/270>
00000000 <_EIP>:
Code;  c01367b8 <kmem_cache_grow+58/270>   <=====
   0:   0f 0b                     ud2a      <=====
Code;  c01367ba <kmem_cache_grow+5a/270>
   2:   68 04 f9 63 27            push   $0x2763f904
Code;  c01367bf <kmem_cache_grow+5f/270>
   7:   c0 c7 44                  rol    $0x44,%bh
Code;  c01367c2 <kmem_cache_grow+62/270>
   a:   24 0c                     and    $0xc,%al
Code;  c01367c4 <kmem_cache_grow+64/270>
   c:   01 00                     add    %eax,(%eax)
Code;  c01367c6 <kmem_cache_grow+66/270>
   e:   00 00                     add    %al,(%eax)
Code;  c01367c8 <kmem_cache_grow+68/270>
  10:   89 c8                     mov    %ecx,%eax
Code;  c01367ca <kmem_cache_grow+6a/270>
  12:   25 f0 00 00 00            and    $0xf0,%eax

 <0>Kernel panic: Aiee, killing interrupt handler!


On UP machine even with SMP kernel (the same configuration) it never happened. Guided by the comment in slab.c:1122 I tried (without knowing what I was doing;-) following little patch to net/sched/sch_generic.c and it seems to fix it.

--- sch_generic.c.orig  2003-01-04 14:42:02.000000000 +0100
+++ sch_generic.c       2003-04-12 08:58:34.000000000 +0200
@@ -372,7 +372,7 @@
        struct Qdisc *sch;
        int size = sizeof(*sch) + ops->priv_size;
 
-       sch = kmalloc(size, GFP_KERNEL);
+       sch = kmalloc(size, GFP_ATOMIC);
        if (!sch)
                return NULL;
        memset(sch, 0, size);


Is it the correct fix?

Thanks,
Martin Volf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-15 13:43 ` Tomas Szepe
@ 2003-04-15 14:31   ` jamal
  2003-04-15 21:10     ` Tomas Szepe
  0 siblings, 1 reply; 23+ messages in thread
From: jamal @ 2003-04-15 14:31 UTC (permalink / raw)
  To: Tomas Szepe; +Cc: linux-kernel, netdev




Can you try a different qdisc - not htb to reproduce the problem?

cheers,
jamal

On Tue, 15 Apr 2003, Tomas Szepe wrote:

> Trace; c01376fd <__kmem_cache_alloc+6d/140>
> Trace; c021b809 <qdisc_create_dflt+29/c0>
> Trace; e0a4b71d <[sch_htb]htb_change_class+40d/600>
> Trace; e0a48108 <[sch_htb]htb_find+58/70>
> Trace; c021d5aa <tc_ctl_tclass+14a/2b0>
> Trace; e0a4d1e0 <[sch_htb]htb_class_ops+0/0>
> Trace; c0219b18 <rtnetlink_rcv_msg+1a8/26d>
> Trace; c0219740 <rtnetlink_rcv+c0/1e0>
> Trace; c0219450 <rtnetlink_dump_ifinfo+0/90>
> Trace; c022121a <netlink_data_ready+7a/80>
> Trace; c02209f1 <netlink_unicast+281/330>
> Trace; c0220f71 <netlink_sendmsg+1f1/290>
> Trace; c020a5c5 <sock_sendmsg+75/c0>
> Trace; c020bce7 <sys_sendmsg+1b7/210>
> Trace; c0130010 <do_buffer_fdatasync+30/b0>
> Trace; c012d5b5 <do_anonymous_page+115/130>
> Trace; c012d821 <handle_mm_fault+81/120>
> Trace; c0117f48 <do_page_fault+188/523>
> Trace; c020b11d <sys_socket+3d/60>
> Trace; c020c1d6 <sys_socketcall+246/270>
> Trace; c0117dc0 <do_page_fault+0/523>
> Trace; c0107800 <error_code+34/3c>
> Trace; c010770f <system_call+33/38>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-15 14:31   ` jamal
@ 2003-04-15 21:10     ` Tomas Szepe
  0 siblings, 0 replies; 23+ messages in thread
From: Tomas Szepe @ 2003-04-15 21:10 UTC (permalink / raw)
  To: jamal; +Cc: linux-kernel, netdev

> [hadi@cyberus.ca]
> 
> Can you try a different qdisc - not htb to reproduce the problem?

Not quite possible I'm afraid.

-- 
Tomas Szepe <szepe@pinerecords.com>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-15 13:09 jamal
  2003-04-15 13:43 ` Tomas Szepe
@ 2003-04-16  5:41 ` Catalin BOIE
  2003-04-16 11:49   ` jamal
  1 sibling, 1 reply; 23+ messages in thread
From: Catalin BOIE @ 2003-04-16  5:41 UTC (permalink / raw)
  To: jamal; +Cc: Tomas Szepe, linux-kernel, netdev

> -       sch = kmalloc(size, GFP_KERNEL);
> +       sch = kmalloc(size, GFP_ATOMIC);
>
> mysteriously fixes the problem? Could the problem be elsewhere?
> Can you repost what the issue was? I am not on lk and i just saw the
> posting on a web page.

With many rules (~5000 classes and ~3500 qdiscs and ~50000 filters)
the kernel oopses in slab.c:1128.
It happens on high rates (~15mbit).
On low rates, doesn't.

Seems that an interrupt come and broke the memory allocation.


>>EIP; c0127ab4 <kmem_cache_grow+44/1d8>   <=====

>>EAX; ffffffff <END_OF_CODE+3fd31247/????>
>>EBX; c12c52c0 <END_OF_CODE+ff6508/????>
>>EDI; c12c52c0 <END_OF_CODE+ff6508/????>
>>ESP; ceab1c60 <END_OF_CODE+e7e2ea8/????>

Trace; c0127e0f <kmalloc+eb/110>
Trace; c01d3cac <qdisc_create_dflt+20/bc>
Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
Trace; c01d5265 <tc_ctl_tclass+1cd/214>
Trace; d0820600 <END_OF_CODE+10551848/????>
Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
Trace; c01d0605 <__neigh_event_send+89/1b4>
Trace; c01d7cd4 <netlink_data_ready+1c/60>
Trace; c01d7730 <netlink_unicast+230/278>
Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
Trace; c01c79d5 <sock_sendmsg+69/88>
Trace; c01c8b48 <sys_sendmsg+18c/1e8>
Trace; c0120010 <map_user_kiobuf+8/f8>


>
> cheers,
> jamal
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

---
Catalin(ux) BOIE
catab@deuroconsult.ro

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16  5:41 ` Catalin BOIE
@ 2003-04-16 11:49   ` jamal
  2003-04-16 15:23     ` Manfred Spraul
  2003-04-17  5:13     ` Catalin BOIE
  0 siblings, 2 replies; 23+ messages in thread
From: jamal @ 2003-04-16 11:49 UTC (permalink / raw)
  To: Catalin BOIE; +Cc: Tomas Szepe, linux-kernel, netdev, manfred, kuznet



This is a different problem from previous one posted.

Theres a small window (exposed given that you are provisioning a lot
of qdiscs  and running traffic at the same time) that an incoming packet
interupt will cause the BUG().

GFP_ATOMIC will fix it, but i wonder if it appropriate.

Alexey or Manfred?

cheers,
jamal

PS:- 15mbits is not a lot of traffic ;->

On Wed, 16 Apr 2003, Catalin BOIE wrote:

> > -       sch = kmalloc(size, GFP_KERNEL);
> > +       sch = kmalloc(size, GFP_ATOMIC);
> >
> > mysteriously fixes the problem? Could the problem be elsewhere?
> > Can you repost what the issue was? I am not on lk and i just saw the
> > posting on a web page.
>
> With many rules (~5000 classes and ~3500 qdiscs and ~50000 filters)
> the kernel oopses in slab.c:1128.
> It happens on high rates (~15mbit).
> On low rates, doesn't.
>
> Seems that an interrupt come and broke the memory allocation.
>
>
> >>EIP; c0127ab4 <kmem_cache_grow+44/1d8>   <=====
>
> >>EAX; ffffffff <END_OF_CODE+3fd31247/????>
> >>EBX; c12c52c0 <END_OF_CODE+ff6508/????>
> >>EDI; c12c52c0 <END_OF_CODE+ff6508/????>
> >>ESP; ceab1c60 <END_OF_CODE+e7e2ea8/????>
>
> Trace; c0127e0f <kmalloc+eb/110>
> Trace; c01d3cac <qdisc_create_dflt+20/bc>
> Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> Trace; d0820600 <END_OF_CODE+10551848/????>
> Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> Trace; c01d0605 <__neigh_event_send+89/1b4>
> Trace; c01d7cd4 <netlink_data_ready+1c/60>
> Trace; c01d7730 <netlink_unicast+230/278>
> Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> Trace; c01c79d5 <sock_sendmsg+69/88>
> Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> Trace; c0120010 <map_user_kiobuf+8/f8>
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 11:49   ` jamal
@ 2003-04-16 15:23     ` Manfred Spraul
  2003-04-16 16:06       ` Tomas Szepe
  2003-04-16 18:39       ` jamal
  2003-04-17  5:13     ` Catalin BOIE
  1 sibling, 2 replies; 23+ messages in thread
From: Manfred Spraul @ 2003-04-16 15:23 UTC (permalink / raw)
  To: jamal; +Cc: Catalin BOIE, Tomas Szepe, linux-kernel, netdev, kuznet

jamal wrote:

>This is a different problem from previous one posted.
>
>Theres a small window (exposed given that you are provisioning a lot
>of qdiscs  and running traffic at the same time) that an incoming packet
>interupt will cause the BUG().
>
>GFP_ATOMIC will fix it, but i wonder if it appropriate.
>  
>
This is a 2.4 kernel, correct?

>>With many rules (~5000 classes and ~3500 qdiscs and ~50000 filters)
>>the kernel oopses in slab.c:1128.
>>
This check?
       if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
                BUG();
It's triggered, because someone does something like
    spin_lock_bh(&my_lock);
    p = kmalloc(,GFP_KERNEL);

I don't like the proposed fix: usually code that calls 
kmalloc(,GFP_KERNEL) assumes that it runs at process space, e.g. uses 
semaphores, or non-bh spinlocks, etc.
slab just happens to contain a test that complains about illegal calls.

>>Trace; c0127e0f <kmalloc+eb/110>
>>Trace; c01d3cac <qdisc_create_dflt+20/bc>
>>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
>>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
>>Trace; d0820600 <END_OF_CODE+10551848/????>
>>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
>>Trace; c01d0605 <__neigh_event_send+89/1b4>
>>Trace; c01d7cd4 <netlink_data_ready+1c/60>
>>Trace; c01d7730 <netlink_unicast+230/278>
>>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
>>Trace; c01c79d5 <sock_sendmsg+69/88>
>>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
>>Trace; c0120010 <map_user_kiobuf+8/f8>
>>
>>
>>    
>>
I don't understand the backtrace. Were any modules loaded? Perhaps 
0xd081ecc7 is a module.

I'd add a
    if(in_interrupt()) show_stack(NULL);
into qdisc_create_dflt(), and try to reproduce the bug without modules.

--
    Manfred

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 15:23     ` Manfred Spraul
@ 2003-04-16 16:06       ` Tomas Szepe
  2003-04-16 16:52         ` Manfred Spraul
  2003-04-17  5:25         ` Catalin BOIE
  2003-04-16 18:39       ` jamal
  1 sibling, 2 replies; 23+ messages in thread
From: Tomas Szepe @ 2003-04-16 16:06 UTC (permalink / raw)
  To: Manfred Spraul; +Cc: jamal, Catalin BOIE, linux-kernel, netdev, kuznet

> [manfred@colorfullife.com]
> 
> >>Trace; c0127e0f <kmalloc+eb/110>
> >>Trace; c01d3cac <qdisc_create_dflt+20/bc>
> >>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> >>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> >>Trace; d0820600 <END_OF_CODE+10551848/????>
> >>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> >>Trace; c01d0605 <__neigh_event_send+89/1b4>
> >>Trace; c01d7cd4 <netlink_data_ready+1c/60>
> >>Trace; c01d7730 <netlink_unicast+230/278>
> >>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> >>Trace; c01c79d5 <sock_sendmsg+69/88>
> >>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> >>Trace; c0120010 <map_user_kiobuf+8/f8>
> >>
> I don't understand the backtrace. Were any modules loaded? Perhaps 
> 0xd081ecc7 is a module.

The original backtrace as provided by Martin Volf does not contain
any weird addresses such as 0xd081ecc7 above:

http://marc.theaimsgroup.com/?l=linux-kernel&m=105013596721774&w=2

-- 
Tomas Szepe <szepe@pinerecords.com>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 16:06       ` Tomas Szepe
@ 2003-04-16 16:52         ` Manfred Spraul
  2003-04-16 18:03           ` Marc-Christian Petersen
  2003-04-17  5:25         ` Catalin BOIE
  1 sibling, 1 reply; 23+ messages in thread
From: Manfred Spraul @ 2003-04-16 16:52 UTC (permalink / raw)
  To: Tomas Szepe; +Cc: jamal, Catalin BOIE, linux-kernel, netdev, kuznet

Tomas Szepe wrote:

>The original backtrace as provided by Martin Volf does not contain
>any weird addresses such as 0xd081ecc7 above:
>
>http://marc.theaimsgroup.com/?l=linux-kernel&m=105013596721774&w=2
>  
>
Thanks.
The bug was caused by sch_tree_lock() in htb_change_class().
2.4.21-pre7 contains a fix.

--
    Manfred

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 16:52         ` Manfred Spraul
@ 2003-04-16 18:03           ` Marc-Christian Petersen
  2003-04-16 18:18             ` jamal
  0 siblings, 1 reply; 23+ messages in thread
From: Marc-Christian Petersen @ 2003-04-16 18:03 UTC (permalink / raw)
  To: Manfred Spraul, Tomas Szepe
  Cc: jamal, Catalin BOIE, linux-kernel, netdev, kuznet

On Wednesday 16 April 2003 18:52, Manfred Spraul wrote:

Hi Manfred,

> >The original backtrace as provided by Martin Volf does not contain
> >any weird addresses such as 0xd081ecc7 above:
> >http://marc.theaimsgroup.com/?l=linux-kernel&m=105013596721774&w=2
> Thanks.
> The bug was caused by sch_tree_lock() in htb_change_class().
> 2.4.21-pre7 contains a fix.
am I just blind or isn't there a fix in -pre7|current-BK?

ciao, Marc

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 18:03           ` Marc-Christian Petersen
@ 2003-04-16 18:18             ` jamal
  2003-04-16 21:44               ` Tomas Szepe
  0 siblings, 1 reply; 23+ messages in thread
From: jamal @ 2003-04-16 18:18 UTC (permalink / raw)
  To: Marc-Christian Petersen
  Cc: Manfred Spraul, Tomas Szepe, Catalin BOIE, linux-kernel, netdev,
	kuznet



On Wed, 16 Apr 2003, Marc-Christian Petersen wrote:

> On Wednesday 16 April 2003 18:52, Manfred Spraul wrote:
>
> Hi Manfred,
>
> > >The original backtrace as provided by Martin Volf does not contain
> > >any weird addresses such as 0xd081ecc7 above:
> > >http://marc.theaimsgroup.com/?l=linux-kernel&m=105013596721774&w=2
> > Thanks.
> > The bug was caused by sch_tree_lock() in htb_change_class().
> > 2.4.21-pre7 contains a fix.
> am I just blind or isn't there a fix in -pre7|current-BK?
>

No you are not ;-> Yes, the fix for that specific problem is in
2.4.21-pre7. I think Tomas might have missed that we moved on to the
next problem.

cheers,
jamal

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 18:18             ` jamal
@ 2003-04-16 21:44               ` Tomas Szepe
  0 siblings, 0 replies; 23+ messages in thread
From: Tomas Szepe @ 2003-04-16 21:44 UTC (permalink / raw)
  To: jamal
  Cc: Marc-Christian Petersen, Manfred Spraul, Catalin BOIE,
	linux-kernel, netdev, kuznet

> [hadi@cyberus.ca]
> >
> > > >The original backtrace as provided by Martin Volf does not contain
> > > >any weird addresses such as 0xd081ecc7 above:
> > > >http://marc.theaimsgroup.com/?l=linux-kernel&m=105013596721774&w=2
> > > Thanks.
> > > The bug was caused by sch_tree_lock() in htb_change_class().
> > > 2.4.21-pre7 contains a fix.
> > am I just blind or isn't there a fix in -pre7|current-BK?
> >
> 
> No you are not ;-> Yes, the fix for that specific problem is in
> 2.4.21-pre7. I think Tomas might have missed that we moved on to the
> next problem.

Trouble is, the fix went in for already -pre5 (cset 1.930.3.5), so if you
only look at the pre6->pre7 changelog (like I did), you aren't likely to
find it.  8)

T.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 16:06       ` Tomas Szepe
  2003-04-16 16:52         ` Manfred Spraul
@ 2003-04-17  5:25         ` Catalin BOIE
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin BOIE @ 2003-04-17  5:25 UTC (permalink / raw)
  To: Tomas Szepe
  Cc: Manfred Spraul, jamal, Catalin BOIE, linux-kernel, netdev, kuznet

> > >>Trace; c0127e0f <kmalloc+eb/110>
> > >>Trace; c01d3cac <qdisc_create_dflt+20/bc>
> > >>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> > >>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> > >>Trace; d0820600 <END_OF_CODE+10551848/????>
> > >>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> > >>Trace; c01d0605 <__neigh_event_send+89/1b4>
> > >>Trace; c01d7cd4 <netlink_data_ready+1c/60>
> > >>Trace; c01d7730 <netlink_unicast+230/278>
> > >>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> > >>Trace; c01c79d5 <sock_sendmsg+69/88>
> > >>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> > >>Trace; c0120010 <map_user_kiobuf+8/f8>
> > >>
> > I don't understand the backtrace. Were any modules loaded? Perhaps
> > 0xd081ecc7 is a module.

Yes, is htb module. I don't know why it didn't resolved.

> The original backtrace as provided by Martin Volf does not contain
> any weird addresses such as 0xd081ecc7 above:
>
> http://marc.theaimsgroup.com/?l=linux-kernel&m=105013596721774&w=2
>
> --
> Tomas Szepe <szepe@pinerecords.com>
>

---
Catalin(ux) BOIE
catab@deuroconsult.ro

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 15:23     ` Manfred Spraul
  2003-04-16 16:06       ` Tomas Szepe
@ 2003-04-16 18:39       ` jamal
  2003-04-16 19:43         ` Julian Anastasov
  2003-04-17  6:06         ` Catalin BOIE
  1 sibling, 2 replies; 23+ messages in thread
From: jamal @ 2003-04-16 18:39 UTC (permalink / raw)
  To: Manfred Spraul; +Cc: Catalin BOIE, Tomas Szepe, linux-kernel, netdev, kuznet


On Wed, 16 Apr 2003, Manfred Spraul wrote:

> jamal wrote:
>
> >This is a different problem from previous one posted.
> >
> >Theres a small window (exposed given that you are provisioning a lot
> >of qdiscs  and running traffic at the same time) that an incoming packet
> >interupt will cause the BUG().
> >
> >GFP_ATOMIC will fix it, but i wonder if it appropriate.
> >
> >
> This is a 2.4 kernel, correct?
>

Catalin, Can you what kernel that is?

> >>With many rules (~5000 classes and ~3500 qdiscs and ~50000 filters)
> >>the kernel oopses in slab.c:1128.
> >>
> This check?
>        if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
>                 BUG();

thats the one i meant.

> It's triggered, because someone does something like
>     spin_lock_bh(&my_lock);
>     p = kmalloc(,GFP_KERNEL);
>
> I don't like the proposed fix: usually code that calls
> kmalloc(,GFP_KERNEL) assumes that it runs at process space, e.g. uses
> semaphores, or non-bh spinlocks, etc.
> slab just happens to contain a test that complains about illegal calls.

ok. Nice.

>
> >>Trace; c0127e0f <kmalloc+eb/110>
> >>Trace; c01d3cac <qdisc_create_dflt+20/bc>
> >>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> >>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> >>Trace; d0820600 <END_OF_CODE+10551848/????>
> >>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> >>Trace; c01d0605 <__neigh_event_send+89/1b4>
> >>Trace; c01d7cd4 <netlink_data_ready+1c/60>
> >>Trace; c01d7730 <netlink_unicast+230/278>
> >>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> >>Trace; c01c79d5 <sock_sendmsg+69/88>
> >>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> >>Trace; c0120010 <map_user_kiobuf+8/f8>
> >>
> >>
> >>
> >>
> I don't understand the backtrace. Were any modules loaded? Perhaps
> 0xd081ecc7 is a module.
>

Probably a module. Again Catalin, run no modules.

> I'd add a
>     if(in_interrupt()) show_stack(NULL);
> into qdisc_create_dflt(), and try to reproduce the bug without modules.
>

Catalin - again instead of your fix can you please add this call?

cheers,
jamal

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 18:39       ` jamal
@ 2003-04-16 19:43         ` Julian Anastasov
  2003-04-17  6:06         ` Catalin BOIE
  1 sibling, 0 replies; 23+ messages in thread
From: Julian Anastasov @ 2003-04-16 19:43 UTC (permalink / raw)
  To: jamal
  Cc: Manfred Spraul, Catalin BOIE, Tomas Szepe, linux-kernel, netdev,
	kuznet


	Hello,

On Wed, 16 Apr 2003, jamal wrote:

> > >This is a different problem from previous one posted.

	The problem should be the same. This 'lock bh + GFP_KERNEL'
BUG happens only when slab allocates pages, not on each kmalloc.

> > >Theres a small window (exposed given that you are provisioning a lot
> > >of qdiscs  and running traffic at the same time) that an incoming packet
> > >interupt will cause the BUG().

	This should not happen, may be you see another place that violates
the above rule? IMO, the only problem is that it is not good to
hold locks (including bh one) while using GFP_KERNEL.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 18:39       ` jamal
  2003-04-16 19:43         ` Julian Anastasov
@ 2003-04-17  6:06         ` Catalin BOIE
  2003-04-17 10:55           ` jamal
  1 sibling, 1 reply; 23+ messages in thread
From: Catalin BOIE @ 2003-04-17  6:06 UTC (permalink / raw)
  To: jamal
  Cc: Manfred Spraul, Catalin BOIE, Tomas Szepe, linux-kernel, netdev,
	kuznet

> Catalin, Can you what kernel that is?

2.4.20pre10 works ok but 2.4.20 crash.
With traffic -> no crash with 2.4.20. Without traffic, on other machine,
no crash.


> > It's triggered, because someone does something like
> >     spin_lock_bh(&my_lock);
> >     p = kmalloc(,GFP_KERNEL);
> >
> > I don't like the proposed fix: usually code that calls
> > kmalloc(,GFP_KERNEL) assumes that it runs at process space, e.g. uses
> > semaphores, or non-bh spinlocks, etc.
> > slab just happens to contain a test that complains about illegal calls.
>
> ok. Nice.
>
> >
> > >>Trace; c0127e0f <kmalloc+eb/110>
> > >>Trace; c01d3cac <qdisc_create_dflt+20/bc>
> > >>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> > >>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> > >>Trace; d0820600 <END_OF_CODE+10551848/????>
> > >>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> > >>Trace; c01d0605 <__neigh_event_send+89/1b4>
> > >>Trace; c01d7cd4 <netlink_data_ready+1c/60>
> > >>Trace; c01d7730 <netlink_unicast+230/278>
> > >>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> > >>Trace; c01c79d5 <sock_sendmsg+69/88>
> > >>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> > >>Trace; c0120010 <map_user_kiobuf+8/f8>
> > >>
> > >>
> > >>
> > >>
> > I don't understand the backtrace. Were any modules loaded? Perhaps
> > 0xd081ecc7 is a module.
> >
>
> Probably a module. Again Catalin, run no modules.
It's a production machine. I cannot test this. We plan to replace the
machine, so I can test then.

> > I'd add a
> >     if(in_interrupt()) show_stack(NULL);
> > into qdisc_create_dflt(), and try to reproduce the bug without modules.
> >
>
> Catalin - again instead of your fix can you please add this call?
See above. I cannot test now. I'm very sorry!

> cheers,
> jamal
>

---
Catalin(ux) BOIE
catab@deuroconsult.ro

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-17  6:06         ` Catalin BOIE
@ 2003-04-17 10:55           ` jamal
  2003-04-18  6:47             ` Catalin BOIE
  0 siblings, 1 reply; 23+ messages in thread
From: jamal @ 2003-04-17 10:55 UTC (permalink / raw)
  To: Catalin BOIE; +Cc: Manfred Spraul, Tomas Szepe, linux-kernel, netdev, kuznet



Ok, I stand corrected. Tomas is right- same problem. You had htb loaded
as a module, the other person had it compiled in ;->
Get yourself upgraded ;->

cheers,
jamal

On Thu, 17 Apr 2003, Catalin BOIE wrote:

> > Catalin, Can you what kernel that is?
>
> 2.4.20pre10 works ok but 2.4.20 crash.
> With traffic -> no crash with 2.4.20. Without traffic, on other machine,
> no crash.
>
>
> > > It's triggered, because someone does something like
> > >     spin_lock_bh(&my_lock);
> > >     p = kmalloc(,GFP_KERNEL);
> > >
> > > I don't like the proposed fix: usually code that calls
> > > kmalloc(,GFP_KERNEL) assumes that it runs at process space, e.g. uses
> > > semaphores, or non-bh spinlocks, etc.
> > > slab just happens to contain a test that complains about illegal calls.
> >
> > ok. Nice.
> >
> > >
> > > >>Trace; c0127e0f <kmalloc+eb/110>
> > > >>Trace; c01d3cac <qdisc_create_dflt+20/bc>
> > > >>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> > > >>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> > > >>Trace; d0820600 <END_OF_CODE+10551848/????>
> > > >>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> > > >>Trace; c01d0605 <__neigh_event_send+89/1b4>
> > > >>Trace; c01d7cd4 <netlink_data_ready+1c/60>
> > > >>Trace; c01d7730 <netlink_unicast+230/278>
> > > >>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> > > >>Trace; c01c79d5 <sock_sendmsg+69/88>
> > > >>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> > > >>Trace; c0120010 <map_user_kiobuf+8/f8>
> > > >>
> > > >>
> > > >>
> > > >>
> > > I don't understand the backtrace. Were any modules loaded? Perhaps
> > > 0xd081ecc7 is a module.
> > >
> >
> > Probably a module. Again Catalin, run no modules.
> It's a production machine. I cannot test this. We plan to replace the
> machine, so I can test then.
>
> > > I'd add a
> > >     if(in_interrupt()) show_stack(NULL);
> > > into qdisc_create_dflt(), and try to reproduce the bug without modules.
> > >
> >
> > Catalin - again instead of your fix can you please add this call?
> See above. I cannot test now. I'm very sorry!
>
> > cheers,
> > jamal
> >
>
> ---
> Catalin(ux) BOIE
> catab@deuroconsult.ro
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-17 10:55           ` jamal
@ 2003-04-18  6:47             ` Catalin BOIE
  0 siblings, 0 replies; 23+ messages in thread
From: Catalin BOIE @ 2003-04-18  6:47 UTC (permalink / raw)
  To: jamal
  Cc: Catalin BOIE, Manfred Spraul, Tomas Szepe, linux-kernel, netdev,
	kuznet

> Ok, I stand corrected. Tomas is right- same problem. You had htb loaded
> as a module, the other person had it compiled in ;->
> Get yourself upgraded ;->
I surely will!

>
> cheers,
> jamal
>
> On Thu, 17 Apr 2003, Catalin BOIE wrote:
>
> > > Catalin, Can you what kernel that is?
> >
> > 2.4.20pre10 works ok but 2.4.20 crash.
> > With traffic -> no crash with 2.4.20. Without traffic, on other machine,
> > no crash.
> >
> >
> > > > It's triggered, because someone does something like
> > > >     spin_lock_bh(&my_lock);
> > > >     p = kmalloc(,GFP_KERNEL);
> > > >
> > > > I don't like the proposed fix: usually code that calls
> > > > kmalloc(,GFP_KERNEL) assumes that it runs at process space, e.g. uses
> > > > semaphores, or non-bh spinlocks, etc.
> > > > slab just happens to contain a test that complains about illegal calls.
> > >
> > > ok. Nice.
> > >
> > > >
> > > > >>Trace; c0127e0f <kmalloc+eb/110>
> > > > >>Trace; c01d3cac <qdisc_create_dflt+20/bc>
> > > > >>Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> > > > >>Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> > > > >>Trace; d0820600 <END_OF_CODE+10551848/????>
> > > > >>Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> > > > >>Trace; c01d0605 <__neigh_event_send+89/1b4>
> > > > >>Trace; c01d7cd4 <netlink_data_ready+1c/60>
> > > > >>Trace; c01d7730 <netlink_unicast+230/278>
> > > > >>Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> > > > >>Trace; c01c79d5 <sock_sendmsg+69/88>
> > > > >>Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> > > > >>Trace; c0120010 <map_user_kiobuf+8/f8>
> > > > >>
> > > > >>
> > > > >>
> > > > >>
> > > > I don't understand the backtrace. Were any modules loaded? Perhaps
> > > > 0xd081ecc7 is a module.
> > > >
> > >
> > > Probably a module. Again Catalin, run no modules.
> > It's a production machine. I cannot test this. We plan to replace the
> > machine, so I can test then.
> >
> > > > I'd add a
> > > >     if(in_interrupt()) show_stack(NULL);
> > > > into qdisc_create_dflt(), and try to reproduce the bug without modules.
> > > >
> > >
> > > Catalin - again instead of your fix can you please add this call?
> > See above. I cannot test now. I'm very sorry!
> >
> > > cheers,
> > > jamal
> > >
> >
> > ---
> > Catalin(ux) BOIE
> > catab@deuroconsult.ro
> >
> >
>

---
Catalin(ux) BOIE
catab@deuroconsult.ro

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] qdisc oops fix
  2003-04-16 11:49   ` jamal
  2003-04-16 15:23     ` Manfred Spraul
@ 2003-04-17  5:13     ` Catalin BOIE
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin BOIE @ 2003-04-17  5:13 UTC (permalink / raw)
  To: jamal; +Cc: Catalin BOIE, Tomas Szepe, linux-kernel, netdev, manfred, kuznet

Hi!

> This is a different problem from previous one posted.
Same oops in slab.c:1128. Why do you think is different.

> Theres a small window (exposed given that you are provisioning a lot
> of qdiscs  and running traffic at the same time) that an incoming packet
> interupt will cause the BUG().
>
> GFP_ATOMIC will fix it, but i wonder if it appropriate.
>
> Alexey or Manfred?
>
> cheers,
> jamal
>
> PS:- 15mbits is not a lot of traffic ;->
Yes, I know. I was comparing 15mbit with almost no traffic on the machine
running same qdiscs/filters/classes... :)

>
> On Wed, 16 Apr 2003, Catalin BOIE wrote:
>
> > > -       sch = kmalloc(size, GFP_KERNEL);
> > > +       sch = kmalloc(size, GFP_ATOMIC);
> > >
> > > mysteriously fixes the problem? Could the problem be elsewhere?
> > > Can you repost what the issue was? I am not on lk and i just saw the
> > > posting on a web page.
> >
> > With many rules (~5000 classes and ~3500 qdiscs and ~50000 filters)
> > the kernel oopses in slab.c:1128.
> > It happens on high rates (~15mbit).
> > On low rates, doesn't.
> >
> > Seems that an interrupt come and broke the memory allocation.
> >
> >
> > >>EIP; c0127ab4 <kmem_cache_grow+44/1d8>   <=====
> >
> > >>EAX; ffffffff <END_OF_CODE+3fd31247/????>
> > >>EBX; c12c52c0 <END_OF_CODE+ff6508/????>
> > >>EDI; c12c52c0 <END_OF_CODE+ff6508/????>
> > >>ESP; ceab1c60 <END_OF_CODE+e7e2ea8/????>
> >
> > Trace; c0127e0f <kmalloc+eb/110>
> > Trace; c01d3cac <qdisc_create_dflt+20/bc>
> > Trace; d081ecc7 <END_OF_CODE+1054ff0f/????>
> > Trace; c01d5265 <tc_ctl_tclass+1cd/214>
> > Trace; d0820600 <END_OF_CODE+10551848/????>
> > Trace; c01d27e4 <rtnetlink_rcv+298/3bc>
> > Trace; c01d0605 <__neigh_event_send+89/1b4>
> > Trace; c01d7cd4 <netlink_data_ready+1c/60>
> > Trace; c01d7730 <netlink_unicast+230/278>
> > Trace; c01d7b73 <netlink_sendmsg+1fb/20c>
> > Trace; c01c79d5 <sock_sendmsg+69/88>
> > Trace; c01c8b48 <sys_sendmsg+18c/1e8>
> > Trace; c0120010 <map_user_kiobuf+8/f8>
> >
> >
>

---
Catalin(ux) BOIE
catab@deuroconsult.ro

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2003-04-18  6:47 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-04-15 22:42 [PATCH] qdisc oops fix Julian Anastasov
2003-04-16  0:53 ` jamal
2003-04-16  6:30 ` devik
2003-04-16  7:24   ` Julian Anastasov
  -- strict thread matches above, loose matches on Subject: below --
2003-04-15 13:09 jamal
2003-04-15 13:43 ` Tomas Szepe
2003-04-15 14:31   ` jamal
2003-04-15 21:10     ` Tomas Szepe
2003-04-16  5:41 ` Catalin BOIE
2003-04-16 11:49   ` jamal
2003-04-16 15:23     ` Manfred Spraul
2003-04-16 16:06       ` Tomas Szepe
2003-04-16 16:52         ` Manfred Spraul
2003-04-16 18:03           ` Marc-Christian Petersen
2003-04-16 18:18             ` jamal
2003-04-16 21:44               ` Tomas Szepe
2003-04-17  5:25         ` Catalin BOIE
2003-04-16 18:39       ` jamal
2003-04-16 19:43         ` Julian Anastasov
2003-04-17  6:06         ` Catalin BOIE
2003-04-17 10:55           ` jamal
2003-04-18  6:47             ` Catalin BOIE
2003-04-17  5:13     ` Catalin BOIE

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).