netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Fix rose.ko oops on unload
@ 2007-10-03 18:54 Alexey Dobriyan
  2007-10-03 19:04 ` Jeff Garzik
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Alexey Dobriyan @ 2007-10-03 18:54 UTC (permalink / raw)
  To: ralf, davem; +Cc: netdev, pidoux

Quick'n'dirty fix to 100% oops on "rmmod rose". Do you want me to
properly unwind everything before .24?
-----------
Commit a3d384029aa304f8f3f5355d35f0ae274454f7cd aka
"[AX.25]: Fix unchecked rose_add_loopback_neigh uses"
transformed rose_loopback_neigh var into statically allocated one.
However, on unload it will be kfree's which can't work.

Steps to reproduce:

	modprobe rose
	rmmod rose

BUG: unable to handle kernel NULL pointer dereference at virtual address 00000008
 printing eip:
c014c664
*pde = 00000000
Oops: 0000 [#1]
PREEMPT DEBUG_PAGEALLOC
Modules linked in: rose ax25 fan ufs loop usbhid rtc snd_intel8x0 snd_ac97_codec ehci_hcd ac97_bus uhci_hcd thermal usbcore button processor evdev sr_mod cdrom
CPU:    0
EIP:    0060:[<c014c664>]    Not tainted VLI
EFLAGS: 00210086   (2.6.23-rc9 #3)
EIP is at kfree+0x48/0xa1
eax: 00000556   ebx: c1734aa0   ecx: f6a5e000   edx: f7082000
esi: 00000000   edi: f9a55d20   ebp: 00200287   esp: f6a5ef28
ds: 007b   es: 007b   fs: 0000  gs: 0033  ss: 0068
Process rmmod (pid: 1823, ti=f6a5e000 task=f7082000 task.ti=f6a5e000)
Stack: f9a55d20 f9a5200c 00000000 00000000 00000000 f6a5e000 f9a5200c f9a55a00 
       00000000 bf818cf0 f9a51f3f f9a55a00 00000000 c0132c60 65736f72 00000000 
       f69f9630 f69f9528 c014244a f6a4e900 00200246 f7082000 c01025e6 00000000 
Call Trace:
 [<f9a5200c>] rose_rt_free+0x1d/0x49 [rose]
 [<f9a5200c>] rose_rt_free+0x1d/0x49 [rose]
 [<f9a51f3f>] rose_exit+0x4c/0xd5 [rose]
 [<c0132c60>] sys_delete_module+0x15e/0x186
 [<c014244a>] remove_vma+0x40/0x45
 [<c01025e6>] sysenter_past_esp+0x8f/0x99
 [<c012bacf>] trace_hardirqs_on+0x118/0x13b
 [<c01025b6>] sysenter_past_esp+0x5f/0x99
 =======================
Code: 05 03 1d 80 db 5b c0 8b 03 25 00 40 02 00 3d 00 40 02 00 75 03 8b 5b 0c 8b 73 10 8b 44 24 18 89 44 24 04 9c 5d fa e8 77 df fd ff <8b> 56 08 89 f8 e8 84 f4 fd ff e8 bd 32 06 00 3b 5c 86 60 75 0f 
EIP: [<c014c664>] kfree+0x48/0xa1 SS:ESP 0068:f6a5ef28

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 include/net/rose.h       |    2 +-
 net/rose/rose_loopback.c |    4 ++--
 net/rose/rose_route.c    |   15 ++++++++++-----
 3 files changed, 13 insertions(+), 8 deletions(-)

--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -188,7 +188,7 @@ extern void rose_kick(struct sock *);
 extern void rose_enquiry_response(struct sock *);
 
 /* rose_route.c */
-extern struct rose_neigh rose_loopback_neigh;
+extern struct rose_neigh *rose_loopback_neigh;
 extern const struct file_operations rose_neigh_fops;
 extern const struct file_operations rose_nodes_fops;
 extern const struct file_operations rose_routes_fops;
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -79,7 +79,7 @@ static void rose_loopback_timer(unsigned long param)
 
 		skb_reset_transport_header(skb);
 
-		sk = rose_find_socket(lci_o, &rose_loopback_neigh);
+		sk = rose_find_socket(lci_o, rose_loopback_neigh);
 		if (sk) {
 			if (rose_process_rx_frame(sk, skb) == 0)
 				kfree_skb(skb);
@@ -88,7 +88,7 @@ static void rose_loopback_timer(unsigned long param)
 
 		if (frametype == ROSE_CALL_REQUEST) {
 			if ((dev = rose_dev_get(dest)) != NULL) {
-				if (rose_rx_call_request(skb, dev, &rose_loopback_neigh, lci_o) == 0)
+				if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
 					kfree_skb(skb);
 			} else {
 				kfree_skb(skb);
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -45,7 +45,7 @@ static DEFINE_SPINLOCK(rose_neigh_list_lock);
 static struct rose_route *rose_route_list;
 static DEFINE_SPINLOCK(rose_route_list_lock);
 
-struct rose_neigh rose_loopback_neigh;
+struct rose_neigh *rose_loopback_neigh;
 
 /*
  *	Add a new route to a node, and in the process add the node and the
@@ -362,7 +362,12 @@ out:
  */
 void rose_add_loopback_neigh(void)
 {
-	struct rose_neigh *sn = &rose_loopback_neigh;
+	struct rose_neigh *sn;
+
+	rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_KERNEL);
+	if (!rose_loopback_neigh)
+		return;
+	sn = rose_loopback_neigh;
 
 	sn->callsign  = null_ax25_address;
 	sn->digipeat  = NULL;
@@ -417,13 +422,13 @@ int rose_add_loopback_node(rose_address *address)
 	rose_node->mask         = 10;
 	rose_node->count        = 1;
 	rose_node->loopback     = 1;
-	rose_node->neighbour[0] = &rose_loopback_neigh;
+	rose_node->neighbour[0] = rose_loopback_neigh;
 
 	/* Insert at the head of list. Address is always mask=10 */
 	rose_node->next = rose_node_list;
 	rose_node_list  = rose_node;
 
-	rose_loopback_neigh.count++;
+	rose_loopback_neigh->count++;
 
 out:
 	spin_unlock_bh(&rose_node_list_lock);
@@ -454,7 +459,7 @@ void rose_del_loopback_node(rose_address *address)
 
 	rose_remove_node(rose_node);
 
-	rose_loopback_neigh.count--;
+	rose_loopback_neigh->count--;
 
 out:
 	spin_unlock_bh(&rose_node_list_lock);

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix rose.ko oops on unload
  2007-10-03 18:54 [PATCH] Fix rose.ko oops on unload Alexey Dobriyan
@ 2007-10-03 19:04 ` Jeff Garzik
  2007-10-03 19:21   ` Alexey Dobriyan
  2007-10-08  6:44 ` David Miller
  2007-11-21 22:13 ` Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko Bernard Pidoux
  2 siblings, 1 reply; 12+ messages in thread
From: Jeff Garzik @ 2007-10-03 19:04 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: ralf, davem, netdev, pidoux

Alexey Dobriyan wrote:
> Quick'n'dirty fix to 100% oops on "rmmod rose". Do you want me to
> properly unwind everything before .24?
> -----------
> Commit a3d384029aa304f8f3f5355d35f0ae274454f7cd aka
> "[AX.25]: Fix unchecked rose_add_loopback_neigh uses"
> transformed rose_loopback_neigh var into statically allocated one.
> However, on unload it will be kfree's which can't work.

I'm definitely missing something...  assuming your patch is applied, 
where is the kfree() for rose_loopback_neigh ?

	Jeff




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix rose.ko oops on unload
  2007-10-03 19:04 ` Jeff Garzik
@ 2007-10-03 19:21   ` Alexey Dobriyan
  0 siblings, 0 replies; 12+ messages in thread
From: Alexey Dobriyan @ 2007-10-03 19:21 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: ralf, davem, netdev, pidoux

On Wed, Oct 03, 2007 at 03:04:20PM -0400, Jeff Garzik wrote:
> Alexey Dobriyan wrote:
> >Quick'n'dirty fix to 100% oops on "rmmod rose". Do you want me to
> >properly unwind everything before .24?
> >-----------
> >Commit a3d384029aa304f8f3f5355d35f0ae274454f7cd aka
> >"[AX.25]: Fix unchecked rose_add_loopback_neigh uses"
> >transformed rose_loopback_neigh var into statically allocated one.
> >However, on unload it will be kfree's which can't work.
> 
> I'm definitely missing something...  assuming your patch is applied, 
> where is the kfree() for rose_loopback_neigh ?

AFAICS, it will be glued to rose_neigh_list in rose_add_loopback_neigh().
On unload, found and rose_remove_neigh() will free it.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix rose.ko oops on unload
  2007-10-03 18:54 [PATCH] Fix rose.ko oops on unload Alexey Dobriyan
  2007-10-03 19:04 ` Jeff Garzik
@ 2007-10-08  6:44 ` David Miller
  2007-12-14 21:58   ` [PATCH] [ROSE] reverts commits d85838c55d836c33077344fab424f200f2827d84 Bernard Pidoux
  2007-11-21 22:13 ` Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko Bernard Pidoux
  2 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2007-10-08  6:44 UTC (permalink / raw)
  To: adobriyan; +Cc: ralf, netdev, pidoux

From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 3 Oct 2007 22:54:13 +0400

> Quick'n'dirty fix to 100% oops on "rmmod rose". Do you want me to
> properly unwind everything before .24?

Patch applied, thanks Alexey.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-10-03 18:54 [PATCH] Fix rose.ko oops on unload Alexey Dobriyan
  2007-10-03 19:04 ` Jeff Garzik
  2007-10-08  6:44 ` David Miller
@ 2007-11-21 22:13 ` Bernard Pidoux
  2007-11-28 13:48   ` Jarek Poplawski
  2007-12-02 17:37   ` Bernard Pidoux
  2 siblings, 2 replies; 12+ messages in thread
From: Bernard Pidoux @ 2007-11-21 22:13 UTC (permalink / raw)
  To: Alexey Dobriyan, ralf; +Cc: davem, netdev

[-- Attachment #1: Type: text/plain, Size: 1796 bytes --]

Hi,

I am practicing intensively AX25 packet radio that uses ax25.ko together 
with mkiss, crc16, netrom, and rose modules using two PIII CPU Linux 
machines with 2.6.23.8 kernel.

On the first Linux machine I did not validate kernel hacking and AX25 
applications are running 100% of the time without serious problems.

On the second machine I validated kernel hacking and sooner or later I 
get exactly the same message after a connect timeout expires :

[ INFO: inconsistent lock state ]

The error seems to reside around ax25_disconnect+0x46/0xaf [ax25] that 
is called when an AX25 connect timeout or a connection failure occurs.
Connect timeout is probably activating
ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]

The message is only displayed once on a boot session.

Ralf Baechle explained to me that ax25 code is very buggy and spinlocks 
difficult to trace.
However, as the cause of error is clearlHowever, as the cause of error 
is clearly identified and the y identified and the reported address is 
constant I suspect that an experienced programmer (which I am not) could 
trace the problem.

Moreover, I had the opportunity to catch a different message, that was 
longer than usual, and seem more explicit :

[ INFO: possible irq lock inversion dependency detected ]

Although the symptom is different it is related to the same origin :

fpac/4933 just changed the state of lock:
  (slock-AF_AX25){--..}, at: [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]

Whatever the running application is the inconsistent lock state could be 
observed with ax25_call, flexd, fpac ax25 application programms.

Please find attached a few reports captured from dmesg after each event.

Could someone look at the listing and identify the origin of the problem 
,if unique ?

Thanks.

Bernard Pidoux



[-- Attachment #2: inconsistent_locks_report --]
[-- Type: text/plain, Size: 21620 bytes --]


=========================================================
[ INFO: possible irq lock inversion dependency detected ]
2.6.23.1 #1
---------------------------------------------------------
fpac/4933 just changed the state of lock:
 (slock-AF_AX25){--..}, at: [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
but this lock was taken by another, soft-irq-safe lock in the past:
 (ax25_list_lock){-+..}

and interrupts could create inverse lock ordering between them.


other info that might help us debug this:
no locks held by fpac/4933.

the first lock's dependencies:
-> (slock-AF_AX25){--..} ops: 410 {
   initial-use  at:
                        [<c012f448>] mark_lock+0x5b/0x44b
                        [<c0130358>] __lock_acquire+0x4c2/0xc02
                        [<c0130b06>] lock_acquire+0x6e/0x87
                        [<c024886b>] lock_sock_nested+0x26/0xcc
                        [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                        [<c024886b>] lock_sock_nested+0x26/0xcc
                        [<c024886b>] lock_sock_nested+0x26/0xcc
                        [<c02462e0>] sock_fasync+0x61/0x116
                        [<c024727f>] sock_close+0x22/0x2f
                        [<c015c78b>] __fput+0xbc/0x172
                        [<c015a256>] filp_close+0x51/0x58
                        [<c0119daf>] put_files_struct+0x5e/0xa6
                        [<c011ae59>] do_exit+0x22e/0x6d9
                        [<c0103cc6>] sysenter_past_esp+0x8f/0x99
                        [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
                        [<c011b36f>] sys_exit_group+0x0/0xd
                        [<c0103c96>] sysenter_past_esp+0x5f/0x99
                        [<ffffffff>] 0xffffffff
   softirq-on-W at:
                        [<c0130a50>] __lock_acquire+0xbba/0xc02
                        [<c0130343>] __lock_acquire+0x4ad/0xc02
                        [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
                        [<c0130b06>] lock_acquire+0x6e/0x87
                        [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
                        [<c02a37c2>] _spin_lock+0x29/0x34
                        [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
                        [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
                        [<d8be50c0>] ax25_release+0x9d/0x182 [ax25]
                        [<c0246e79>] sock_release+0x14/0x56
                        [<c0247287>] sock_close+0x2a/0x2f
                        [<c015c78b>] __fput+0xbc/0x172
                        [<c015a256>] filp_close+0x51/0x58
                        [<c015b284>] sys_close+0x66/0x9d
                        [<c0103c96>] sysenter_past_esp+0x5f/0x99
                        [<ffffffff>] 0xffffffff
   hardirq-on-W at:
                        [<c012f448>] mark_lock+0x5b/0x44b
                        [<c013031e>] __lock_acquire+0x488/0xc02
                        [<c0130b06>] lock_acquire+0x6e/0x87
                        [<c024886b>] lock_sock_nested+0x26/0xcc
                        [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                        [<c024886b>] lock_sock_nested+0x26/0xcc
                        [<c024886b>] lock_sock_nested+0x26/0xcc
                        [<c02462e0>] sock_fasync+0x61/0x116
                        [<c024727f>] sock_close+0x22/0x2f
                        [<c015c78b>] __fput+0xbc/0x172
                        [<c015a256>] filp_close+0x51/0x58
                        [<c0119daf>] put_files_struct+0x5e/0xa6
                        [<c011ae59>] do_exit+0x22e/0x6d9
                        [<c0103cc6>] sysenter_past_esp+0x8f/0x99
                        [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
                        [<c011b36f>] sys_exit_group+0x0/0xd
                        [<c0103c96>] sysenter_past_esp+0x5f/0x99
                        [<ffffffff>] 0xffffffff
 }
 ... key      at: [<c05db598>] af_family_slock_keys+0x18/0x120

the second lock's dependencies:
-> (ax25_list_lock){-+..} ops: 24485 {
   initial-use  at:
                        [<c0130358>] __lock_acquire+0x4c2/0xc02
                        [<c011c823>] local_bh_enable+0xfe/0x105
                        [<c012f871>] mark_held_locks+0x39/0x53
                        [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
                        [<c0130b06>] lock_acquire+0x6e/0x87
                        [<d8be3c0c>] ax25_cb_add+0xd/0x39 [ax25]
                        [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                        [<d8be3c0c>] ax25_cb_add+0xd/0x39 [ax25]
                        [<d8be3c0c>] ax25_cb_add+0xd/0x39 [ax25]
                        [<d8be569d>] ax25_bind+0x129/0x14b [ax25]
                        [<c0247668>] sys_bind+0x75/0xa0
                        [<c02a37c2>] _spin_lock+0x29/0x34
                        [<c02a371a>] _spin_unlock+0x14/0x1c
                        [<c024694e>] sock_map_fd+0x41/0x4a
                        [<c0247df4>] sys_socketcall+0x93/0x261
                        [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
                        [<c0103c96>] sysenter_past_esp+0x5f/0x99
                        [<ffffffff>] 0xffffffff
   in-softirq-W at:
                        [<c012f5cb>] mark_lock+0x1de/0x44b
                        [<c01302cc>] __lock_acquire+0x436/0xc02
                        [<c0130a50>] __lock_acquire+0xbba/0xc02
                        [<c0130b06>] lock_acquire+0x6e/0x87
                        [<d8be4450>] ax25_find_cb+0x18/0xc6 [ax25]
                        [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                        [<d8be4450>] ax25_find_cb+0x18/0xc6 [ax25]
                        [<d8be4450>] ax25_find_cb+0x18/0xc6 [ax25]
                        [<d8be0bf9>] ax25_kiss_rcv+0x37d/0x712 [ax25]
                        [<c012f871>] mark_held_locks+0x39/0x53
                        [<c024a6af>] sock_queue_rcv_skb+0xd6/0xf3
                        [<c02a36d9>] _read_unlock+0x14/0x1c
                        [<c024a6af>] sock_queue_rcv_skb+0xd6/0xf3
                        [<c0250250>] netif_receive_skb+0x22d/0x289
                        [<c012fa48>] trace_hardirqs_on+0x109/0x148
                        [<c0252113>] process_backlog+0x7b/0xeb
                        [<c02521da>] net_rx_action+0x57/0xfd
                        [<c011c515>] __do_softirq+0x40/0x90
                        [<c0171866>] seq_read+0x1d1/0x277
                        [<c011c58c>] do_softirq+0x27/0x3d
                        [<c011c8c7>] local_bh_enable_ip+0x9d/0xc5
                        [<c0171866>] seq_read+0x1d1/0x277
                        [<c0171695>] seq_read+0x0/0x277
                        [<c0185922>] proc_reg_read+0x64/0x77
                        [<c01858be>] proc_reg_read+0x0/0x77
                        [<c015c0d0>] vfs_read+0xa6/0x12e
                        [<c015c3fe>] sys_read+0x41/0x67
                        [<c0103c96>] sysenter_past_esp+0x5f/0x99
                        [<ffffffff>] 0xffffffff
   hardirq-on-W at:
                        [<c013031e>] __lock_acquire+0x488/0xc02
                        [<c011c823>] local_bh_enable+0xfe/0x105
                        [<c012f871>] mark_held_locks+0x39/0x53
                        [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
                        [<c0130b06>] lock_acquire+0x6e/0x87
                        [<d8be3c0c>] ax25_cb_add+0xd/0x39 [ax25]
                        [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                        [<d8be3c0c>] ax25_cb_add+0xd/0x39 [ax25]
                        [<d8be3c0c>] ax25_cb_add+0xd/0x39 [ax25]
                        [<d8be569d>] ax25_bind+0x129/0x14b [ax25]
                        [<c0247668>] sys_bind+0x75/0xa0
                        [<c02a37c2>] _spin_lock+0x29/0x34
                        [<c02a371a>] _spin_unlock+0x14/0x1c
                        [<c024694e>] sock_map_fd+0x41/0x4a
                        [<c0247df4>] sys_socketcall+0x93/0x261
                        [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
                        [<c0103c96>] sysenter_past_esp+0x5f/0x99
                        [<ffffffff>] 0xffffffff
 }
 ... key      at: [<d8beaf70>] ax25_list_lock+0x10/0xffffbf0f [ax25]
 -> (slock-AF_AX25){--..} ops: 410 {
    initial-use  at:
                          [<c012f448>] mark_lock+0x5b/0x44b
                          [<c0130358>] __lock_acquire+0x4c2/0xc02
                          [<c0130b06>] lock_acquire+0x6e/0x87
                          [<c024886b>] lock_sock_nested+0x26/0xcc
                          [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                          [<c024886b>] lock_sock_nested+0x26/0xcc
                          [<c024886b>] lock_sock_nested+0x26/0xcc
                          [<c02462e0>] sock_fasync+0x61/0x116
                          [<c024727f>] sock_close+0x22/0x2f
                          [<c015c78b>] __fput+0xbc/0x172
                          [<c015a256>] filp_close+0x51/0x58
                          [<c0119daf>] put_files_struct+0x5e/0xa6
                          [<c011ae59>] do_exit+0x22e/0x6d9
                          [<c0103cc6>] sysenter_past_esp+0x8f/0x99
                          [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
                          [<c011b36f>] sys_exit_group+0x0/0xd
                          [<c0103c96>] sysenter_past_esp+0x5f/0x99
                          [<ffffffff>] 0xffffffff
    softirq-on-W at:
                          [<c0130a50>] __lock_acquire+0xbba/0xc02
                          [<c0130343>] __lock_acquire+0x4ad/0xc02
                          [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
                          [<c0130b06>] lock_acquire+0x6e/0x87
                          [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
                          [<c02a37c2>] _spin_lock+0x29/0x34
                          [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
                          [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
                          [<d8be50c0>] ax25_release+0x9d/0x182 [ax25]
                          [<c0246e79>] sock_release+0x14/0x56
                          [<c0247287>] sock_close+0x2a/0x2f
                          [<c015c78b>] __fput+0xbc/0x172
                          [<c015a256>] filp_close+0x51/0x58
                          [<c015b284>] sys_close+0x66/0x9d
                          [<c0103c96>] sysenter_past_esp+0x5f/0x99
                          [<ffffffff>] 0xffffffff
    hardirq-on-W at:
                          [<c012f448>] mark_lock+0x5b/0x44b
                          [<c013031e>] __lock_acquire+0x488/0xc02
                          [<c0130b06>] lock_acquire+0x6e/0x87
                          [<c024886b>] lock_sock_nested+0x26/0xcc
                          [<c02a37fb>] _spin_lock_bh+0x2e/0x39
                          [<c024886b>] lock_sock_nested+0x26/0xcc
                          [<c024886b>] lock_sock_nested+0x26/0xcc
                          [<c02462e0>] sock_fasync+0x61/0x116
                          [<c024727f>] sock_close+0x22/0x2f
                          [<c015c78b>] __fput+0xbc/0x172
                          [<c015a256>] filp_close+0x51/0x58
                          [<c0119daf>] put_files_struct+0x5e/0xa6
                          [<c011ae59>] do_exit+0x22e/0x6d9
                          [<c0103cc6>] sysenter_past_esp+0x8f/0x99
                          [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
                          [<c011b36f>] sys_exit_group+0x0/0xd
                          [<c0103c96>] sysenter_past_esp+0x5f/0x99
                          [<ffffffff>] 0xffffffff
  }
  ... key      at: [<c05db598>] af_family_slock_keys+0x18/0x120
 ... acquired at:
   [<c012df48>] add_lock_to_list+0x68/0x8b
   [<c01308ab>] __lock_acquire+0xa15/0xc02
   [<d8be3f06>] ax25_info_show+0x23e/0x2aa [ax25]
   [<c0130b06>] lock_acquire+0x6e/0x87
   [<d8be3f06>] ax25_info_show+0x23e/0x2aa [ax25]
   [<c02a37c2>] _spin_lock+0x29/0x34
   [<d8be3f06>] ax25_info_show+0x23e/0x2aa [ax25]
   [<d8be3f06>] ax25_info_show+0x23e/0x2aa [ax25]
   [<d8be0030>] ax2asc+0x30/0x5a [ax25]
   [<c017182a>] seq_read+0x195/0x277
   [<c0171695>] seq_read+0x0/0x277
   [<c0185922>] proc_reg_read+0x64/0x77
   [<c01858be>] proc_reg_read+0x0/0x77
   [<c015c0d0>] vfs_read+0xa6/0x12e
   [<c015c3fe>] sys_read+0x41/0x67
   [<c0103c96>] sysenter_past_esp+0x5f/0x99
   [<ffffffff>] 0xffffffff


stack backtrace:
 [<c012ec45>] print_irq_inversion_bug+0x10b/0x115
 [<c012f323>] check_usage_backwards+0x3c/0x41
 [<c012f6a5>] mark_lock+0x2b8/0x44b
 [<c0130a50>] __lock_acquire+0xbba/0xc02
 [<c0130343>] __lock_acquire+0x4ad/0xc02
 [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
 [<c0130b06>] lock_acquire+0x6e/0x87
 [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
 [<c02a37c2>] _spin_lock+0x29/0x34
 [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8be50c0>] ax25_release+0x9d/0x182 [ax25]
 [<c0246e79>] sock_release+0x14/0x56
 [<c0247287>] sock_close+0x2a/0x2f
 [<c015c78b>] __fput+0xbc/0x172
 [<c015a256>] filp_close+0x51/0x58
 [<c015b284>] sys_close+0x66/0x9d
 [<c0103c96>] sysenter_past_esp+0x5f/0x99
 =======================

=================================
[ INFO: inconsistent lock state ]
2.6.23.1 #1
---------------------------------
inconsistent {in-softirq-W} -> {softirq-on-W} usage.
ax25_call/4005 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (slock-AF_AX25){-+..}, at: [<d8b79312>] ax25_disconnect+0x46/0xaf [ax25]
{in-softirq-W} state was registered at:
  [<c0130a50>] __lock_acquire+0xbba/0xc02
  [<c01302cc>] __lock_acquire+0x436/0xc02
  [<c0130a50>] __lock_acquire+0xbba/0xc02
  [<c0130b06>] lock_acquire+0x6e/0x87
  [<d8b78f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<d8b79797>] ax25_heartbeat_expiry+0x0/0x29 [ax25]
  [<c02a37c2>] _spin_lock+0x29/0x34
  [<d8b78f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<d8b78f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<c011f4db>] run_timer_softirq+0xee/0x14a
  [<c011c506>] __do_softirq+0x31/0x90
  [<c012fa48>] trace_hardirqs_on+0x109/0x148
  [<c011c515>] __do_softirq+0x40/0x90
  [<c011c58c>] do_softirq+0x27/0x3d
  [<c0106768>] do_IRQ+0x58/0x6c
  [<c0104cee>] common_interrupt+0x2e/0x40
  [<d8a9a63f>] acpi_processor_idle+0x262/0x3cf [processor]
  [<c0102342>] cpu_idle+0x3c/0x51
  [<c0382a0c>] start_kernel+0x272/0x277
  [<c0382323>] unknown_bootoption+0x0/0x195
  [<ffffffff>] 0xffffffff
irq event stamp: 1663
hardirqs last  enabled at (1663): [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
hardirqs last disabled at (1661): [<c011c885>] local_bh_enable_ip+0x5b/0xc5
softirqs last  enabled at (1662): [<d8b79300>] ax25_disconnect+0x34/0xaf [ax25]
softirqs last disabled at (1656): [<c02a37d8>] _spin_lock_bh+0xb/0x39

other info that might help us debug this:
no locks held by ax25_call/4005.

stack backtrace:
 [<c012ed8a>] print_usage_bug+0x13b/0x145
 [<c012f662>] mark_lock+0x275/0x44b
 [<c0130a50>] __lock_acquire+0xbba/0xc02
 [<c0130343>] __lock_acquire+0x4ad/0xc02
 [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
 [<c0130b06>] lock_acquire+0x6e/0x87
 [<d8b79312>] ax25_disconnect+0x46/0xaf [ax25]
 [<c02a37c2>] _spin_lock+0x29/0x34
 [<d8b79312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8b79312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8b7b0c0>] ax25_release+0x9d/0x182 [ax25]
 [<c0246e79>] sock_release+0x14/0x56
 [<c0247287>] sock_close+0x2a/0x2f
 [<c015c78b>] __fput+0xbc/0x172
 [<c015a256>] filp_close+0x51/0x58
 [<c0119daf>] put_files_struct+0x5e/0xa6
 [<c011ae59>] do_exit+0x22e/0x6d9
 [<c02a3a6d>] _spin_unlock_irq+0x20/0x23
 [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
 [<c011b36f>] sys_exit_group+0x0/0xd
 [<c0121c1e>] get_signal_to_deliver+0x3c6/0x3ea
 [<c02475c8>] sys_connect+0x82/0xad
 [<c0103475>] do_notify_resume+0x81/0x5fe
 [<c02a3a6d>] _spin_unlock_irq+0x20/0x23
 [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
 [<c02a3a6d>] _spin_unlock_irq+0x20/0x23
 [<c0247e0d>] sys_socketcall+0xac/0x261
 [<c012fa5e>] trace_hardirqs_on+0x11f/0x148
 [<c0103ded>] work_notifysig+0x13/0x26
 =======================

Linux version 2.6.23.1 (root@bernard) (gcc version 4.1.2 20070302 (prerelease) (4.1.2-1mdv2007.1)) #1 Thu Oct 18 21:45:24 CEST 2007

Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
... MAX_LOCKDEP_SUBCLASSES:    8
... MAX_LOCK_DEPTH:          30
... MAX_LOCKDEP_KEYS:        2048
... CLASSHASH_SIZE:           1024
... MAX_LOCKDEP_ENTRIES:     8192
... MAX_LOCKDEP_CHAINS:      16384
... CHAINHASH_SIZE:          8192
 memory used by lock dependency info: 992 kB
 per task-struct memory footprint: 1200 bytes

=================================
[ INFO: inconsistent lock state ]
2.6.23.1 #1
---------------------------------
inconsistent {in-softirq-W} -> {softirq-on-W} usage.
flexd/3005 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (slock-AF_AX25){-+..}, at: [<d8be6312>] ax25_disconnect+0x46/0xaf [ax25]
{in-softirq-W} state was registered at:
  [<c0130a50>] __lock_acquire+0xbba/0xc02
  [<c01302cc>] __lock_acquire+0x436/0xc02
  [<c0130a50>] __lock_acquire+0xbba/0xc02
  [<c0130b06>] lock_acquire+0x6e/0x87
  [<d8be5f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<d8be6797>] ax25_heartbeat_expiry+0x0/0x29 [ax25]
  [<c02a37c2>] _spin_lock+0x29/0x34
  [<d8be5f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<d8be5f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<c011f4db>] run_timer_softirq+0xee/0x14a
  [<c011c506>] __do_softirq+0x31/0x90
  [<c012fa48>] trace_hardirqs_on+0x109/0x148
  [<c011c515>] __do_softirq+0x40/0x90
  [<c011c58c>] do_softirq+0x27/0x3d
  [<c0106768>] do_IRQ+0x58/0x6c
  [<c0104cee>] common_interrupt+0x2e/0x40
  [<d8a91364>] acpi_safe_halt+0x19/0x25 [processor]
  [<d8a91523>] acpi_processor_idle+0x146/0x3cf [processor]
  [<c0102342>] cpu_idle+0x3c/0x51
  [<c0382a0c>] start_kernel+0x272/0x277
  [<c0382323>] unknown_bootoption+0x0/0x195
  [<ffffffff>] 0xffffffff
irq event stamp: 519
hardirqs last  enabled at (519): [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
hardirqs last disabled at (517): [<c011c885>] local_bh_enable_ip+0x5b/0xc5
softirqs last  enabled at (518): [<d8be6300>] ax25_disconnect+0x34/0xaf [ax25]
softirqs last disabled at (512): [<c02a37d8>] _spin_lock_bh+0xb/0x39

other info that might help us debug this:
no locks held by flexd/3005.

stack backtrace:
 [<c012ed8a>] print_usage_bug+0x13b/0x145
 [<c012f662>] mark_lock+0x275/0x44b
 [<c0130a50>] __lock_acquire+0xbba/0xc02
 [<c0130343>] __lock_acquire+0x4ad/0xc02
 [<c011c8e7>] local_bh_enable_ip+0xbd/0xc5
 [<c0130b06>] lock_acquire+0x6e/0x87
 [<d8be6312>] ax25_disconnect+0x46/0xaf [ax25]
 [<c02a37c2>] _spin_lock+0x29/0x34
 [<d8be6312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8be6312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8be80c0>] ax25_release+0x9d/0x182 [ax25]
 [<c0246e79>] sock_release+0x14/0x56
 [<c0247287>] sock_close+0x2a/0x2f
 [<c015c78b>] __fput+0xbc/0x172
 [<c015a256>] filp_close+0x51/0x58
 [<c015b284>] sys_close+0x66/0x9d
 [<c0103c96>] sysenter_past_esp+0x5f/0x99
 =======================

Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
... MAX_LOCKDEP_SUBCLASSES:    8
... MAX_LOCK_DEPTH:          30
... MAX_LOCKDEP_KEYS:        2048
... CLASSHASH_SIZE:           1024
... MAX_LOCKDEP_ENTRIES:     8192
... MAX_LOCKDEP_CHAINS:      16384
... CHAINHASH_SIZE:          8192
 memory used by lock dependency info: 992 kB
 per task-struct memory footprint: 1200 bytes

=================================
[ INFO: inconsistent lock state ]
2.6.23.8 #2
---------------------------------
inconsistent {in-softirq-W} -> {softirq-on-W} usage.
flexd/4679 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (slock-AF_AX25){-+..}, at: [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
{in-softirq-W} state was registered at:
  [<c0130a4c>] __lock_acquire+0xb9e/0xbe6
  [<c01302c6>] __lock_acquire+0x418/0xbe6
  [<c0130a4c>] __lock_acquire+0xb9e/0xbe6
  [<c0130b02>] lock_acquire+0x6e/0x87
  [<d8be2f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<d8be3797>] ax25_heartbeat_expiry+0x0/0x29 [ax25]
  [<c02a3922>] _spin_lock+0x29/0x34
  [<d8be2f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<d8be2f62>] ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
  [<c011f4f3>] run_timer_softirq+0xee/0x14a
  [<c011c51e>] __do_softirq+0x31/0x90
  [<c012fa60>] trace_hardirqs_on+0x109/0x148
  [<c011c52d>] __do_softirq+0x40/0x90
  [<c011c5a4>] do_softirq+0x27/0x3d
  [<c0106768>] do_IRQ+0x58/0x6c
  [<c0104cee>] common_interrupt+0x2e/0x40
  [<d8a9963f>] acpi_processor_idle+0x262/0x3cf [processor]
  [<c0102342>] cpu_idle+0x3c/0x51
  [<c0382a0c>] start_kernel+0x272/0x277
  [<c0382323>] unknown_bootoption+0x0/0x195
  [<ffffffff>] 0xffffffff
irq event stamp: 601
hardirqs last  enabled at (601): [<c011c8ff>] local_bh_enable_ip+0xbd/0xc5
hardirqs last disabled at (599): [<c011c89d>] local_bh_enable_ip+0x5b/0xc5
softirqs last  enabled at (600): [<d8be3300>] ax25_disconnect+0x34/0xaf [ax25]
softirqs last disabled at (594): [<c02a3938>] _spin_lock_bh+0xb/0x39

other info that might help us debug this:
no locks held by flexd/4679.

stack backtrace:
 [<c012eda2>] print_usage_bug+0x13b/0x145
 [<c012f67a>] mark_lock+0x275/0x44b
 [<c0130a4c>] __lock_acquire+0xb9e/0xbe6
 [<c013032d>] __lock_acquire+0x47f/0xbe6
 [<c012f889>] mark_held_locks+0x39/0x53
 [<c011c8ff>] local_bh_enable_ip+0xbd/0xc5
 [<c0130b02>] lock_acquire+0x6e/0x87
 [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
 [<c02a3922>] _spin_lock+0x29/0x34
 [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8be3312>] ax25_disconnect+0x46/0xaf [ax25]
 [<d8be50c0>] ax25_release+0x9d/0x182 [ax25]
 [<c0246f15>] sock_release+0x14/0x56
 [<c0247327>] sock_close+0x2a/0x2f
 [<c015c7ab>] __fput+0xbc/0x172
 [<c015a27a>] filp_close+0x51/0x58
 [<c015b2a8>] sys_close+0x66/0x9d
 [<c0103c96>] sysenter_past_esp+0x5f/0x99
 =======================


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-11-21 22:13 ` Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko Bernard Pidoux
@ 2007-11-28 13:48   ` Jarek Poplawski
  2007-12-02 17:37   ` Bernard Pidoux
  1 sibling, 0 replies; 12+ messages in thread
From: Jarek Poplawski @ 2007-11-28 13:48 UTC (permalink / raw)
  To: Bernard Pidoux; +Cc: Alexey Dobriyan, ralf, davem, netdev

On 21-11-2007 23:13, Bernard Pidoux wrote:
...
> [ INFO: inconsistent lock state ]
> 
> The error seems to reside around ax25_disconnect+0x46/0xaf [ax25] that 
> is called when an AX25 connect timeout or a connection failure occurs.
> Connect timeout is probably activating
> ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]
... 

Hi,

Could you try this patch? (2.6.23 or later - should be no difference)

Thanks,
Jarek P.

---

diff -Nurp linux-2.6.24-rc2-/net/ax25/ax25_subr.c linux-2.6.24-rc2+/net/ax25/ax25_subr.c
--- linux-2.6.24-rc2-/net/ax25/ax25_subr.c	2007-10-09 22:31:38.000000000 +0200
+++ linux-2.6.24-rc2+/net/ax25/ax25_subr.c	2007-11-28 11:51:12.000000000 +0100
@@ -279,6 +279,7 @@ void ax25_disconnect(ax25_cb *ax25, int 
 	ax25_link_failed(ax25, reason);
 
 	if (ax25->sk != NULL) {
+		local_bh_disable();
 		bh_lock_sock(ax25->sk);
 		ax25->sk->sk_state     = TCP_CLOSE;
 		ax25->sk->sk_err       = reason;
@@ -288,5 +289,6 @@ void ax25_disconnect(ax25_cb *ax25, int 
 			sock_set_flag(ax25->sk, SOCK_DEAD);
 		}
 		bh_unlock_sock(ax25->sk);
+		local_bh_enable();
 	}
 }

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-11-21 22:13 ` Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko Bernard Pidoux
  2007-11-28 13:48   ` Jarek Poplawski
@ 2007-12-02 17:37   ` Bernard Pidoux
  2007-12-02 22:02     ` Jarek Poplawski
  1 sibling, 1 reply; 12+ messages in thread
From: Bernard Pidoux @ 2007-12-02 17:37 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: ralf, davem, netdev, Alexey Dobriyan

[-- Attachment #1: Type: text/plain, Size: 910 bytes --]

Hi,

Many thanks for your patch for ~/net/ax25/ax25_subr.c

Introduction of local_bh_disable() ... local_bh_enable()

cured the inconsistent lock state related to AX25 connect timeout.

I have now a stable monoprocessor system running AX25 and ROSE network 
packet switching application FPAC, whether kernel is compiled with or 
without hack option.

There is no more problem during normal operations.

This was achieved, thanks to your AX25 patch and the patch from Alexey 
Dobriyan for rose module.

I also patched rose module in order to get packet routing more 
efficient, taking into account the "restarted" flag that is raised when 
a neighbour node is already connected.

To summarize the present situation on my Linux machine, I built a patch 
against kernel 2.6.23.9.

I would appreciate if you could make it included into a next kernel release.

Many thanks and best regards,

Bernard Pidoux
F6BVP



[-- Attachment #2: rose-patch-2.6.23.9.tgz --]
[-- Type: application/x-compressed-tar, Size: 1745 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-12-02 17:37   ` Bernard Pidoux
@ 2007-12-02 22:02     ` Jarek Poplawski
  2007-12-04 22:26       ` Bernard Pidoux
  0 siblings, 1 reply; 12+ messages in thread
From: Jarek Poplawski @ 2007-12-02 22:02 UTC (permalink / raw)
  To: Bernard Pidoux; +Cc: ralf, davem, netdev, Alexey Dobriyan

Bernard Pidoux wrote, On 12/02/2007 06:37 PM:

> Hi,
> 
> Many thanks for your patch for ~/net/ax25/ax25_subr.c
> 
> Introduction of local_bh_disable() ... local_bh_enable()
> 
> cured the inconsistent lock state related to AX25 connect timeout.
> 
> I have now a stable monoprocessor system running AX25 and ROSE network 
> packet switching application FPAC, whether kernel is compiled with or 
> without hack option.
> 
> There is no more problem during normal operations.
> 
> This was achieved, thanks to your AX25 patch and the patch from Alexey 
> Dobriyan for rose module.
> 
> I also patched rose module in order to get packet routing more 
> efficient, taking into account the "restarted" flag that is raised when 
> a neighbour node is already connected.
> 
> To summarize the present situation on my Linux machine, I built a patch 
> against kernel 2.6.23.9.
> 
> I would appreciate if you could make it included into a next kernel release.
... 

Bernard, I'm very glad I could be a little helpful, but I'm not sure of
your intentions: my patch proposal is rather trivial interpretation of
lockdep's report; I haven't studied AX25 enough even to be sure there is
a real lockup possible in this place. Since this change looks not very
costly and quite safe, I can 'take a risk' to sign this off after your
testing. But anything more is beyond my 'range'.

So, since you've spent quite a lot of time on this all, maybe it would
be simpler if you've tried the same with the current kernel, and resent
"proper" (not gzipped and with changelog) patch or patches. Then, I hope,
Ralf, as the maintainer, will make the rest.

Regards,
Jarek P.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-12-02 22:02     ` Jarek Poplawski
@ 2007-12-04 22:26       ` Bernard Pidoux
  2007-12-04 23:17         ` Jarek Poplawski
  0 siblings, 1 reply; 12+ messages in thread
From: Bernard Pidoux @ 2007-12-04 22:26 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: ralf, davem, netdev, Alexey Dobriyan

[-- Attachment #1: Type: text/plain, Size: 2257 bytes --]


Jarek Poplawski wrote:
> Bernard Pidoux wrote, On 12/02/2007 06:37 PM:
> 
>> Hi,
>>
>> Many thanks for your patch for ~/net/ax25/ax25_subr.c
>>
>> Introduction of local_bh_disable() ... local_bh_enable()
>>
>> cured the inconsistent lock state related to AX25 connect timeout.
>>
>> I have now a stable monoprocessor system running AX25 and ROSE network 
>> packet switching application FPAC, whether kernel is compiled with or 
>> without hack option.
>>
>> There is no more problem during normal operations.
>>
>> This was achieved, thanks to your AX25 patch and the patch from Alexey 
>> Dobriyan for rose module.
>>
>> I also patched rose module in order to get packet routing more 
>> efficient, taking into account the "restarted" flag that is raised when 
>> a neighbour node is already connected.
>>
>> To summarize the present situation on my Linux machine, I built a patch 
>> against kernel 2.6.23.9.
>>
>> I would appreciate if you could make it included into a next kernel release.
> ... 
> 
> Bernard, I'm very glad I could be a little helpful, but I'm not sure of
> your intentions: my patch proposal is rather trivial interpretation of
> lockdep's report; I haven't studied AX25 enough even to be sure there is
> a real lockup possible in this place. Since this change looks not very
> costly and quite safe, I can 'take a risk' to sign this off after your
> testing. But anything more is beyond my 'range'.
> 
> So, since you've spent quite a lot of time on this all, maybe it would
> be simpler if you've tried the same with the current kernel, and resent
> "proper" (not gzipped and with changelog) patch or patches. Then, I hope,
> Ralf, as the maintainer, will make the rest.
> 
> Regards,
> Jarek P.
> 
> 

As required I send again in clear text the summary of ax25 and rose 
patch against 2.6.23.9.
As I said, the kernel stability, after applying these patch, is behind us.
I did not observe any warning nor lockup after a few days of AX25 
intensive use.
Also rose module is handling routing of frames much more efficiently.

This will considerably help us to focus on application programs now.
I am now concentrating my efforts on ROSE/FPAC and Linux FBB code 
adjustement.

Thanks to you all for your help.

73 de Bernard, f6bvp


[-- Attachment #2: rose-patch-2.6.23.9 --]
[-- Type: text/plain, Size: 5103 bytes --]

diff -pruN a/include/net/rose.h b/include/net/rose.h
--- a/include/net/rose.h	2007-10-12 18:43:44.000000000 +0200
+++ b/include/net/rose.h	2007-12-01 23:56:57.000000000 +0100
@@ -202,6 +202,7 @@ extern struct net_device *rose_dev_first
 extern struct net_device *rose_dev_get(rose_address *);
 extern struct rose_route *rose_route_free_lci(unsigned int, struct rose_neigh *);
 extern struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *, unsigned char *);
+extern struct rose_neigh *__rose_get_neigh(rose_address *, unsigned char *, unsigned char *);
 extern int  rose_rt_ioctl(unsigned int, void __user *);
 extern void rose_link_failed(ax25_cb *, int);
 extern int  rose_route_frame(struct sk_buff *, ax25_cb *);
diff -pruN a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
--- a/net/ax25/ax25_subr.c	2007-10-12 18:43:44.000000000 +0200
+++ b/net/ax25/ax25_subr.c	2007-12-01 23:32:01.000000000 +0100
@@ -279,6 +279,7 @@ void ax25_disconnect(ax25_cb *ax25, int 
 	ax25_link_failed(ax25, reason);
 
 	if (ax25->sk != NULL) {
+		local_bh_disable();
 		bh_lock_sock(ax25->sk);
 		ax25->sk->sk_state     = TCP_CLOSE;
 		ax25->sk->sk_err       = reason;
@@ -288,5 +289,6 @@ void ax25_disconnect(ax25_cb *ax25, int 
 			sock_set_flag(ax25->sk, SOCK_DEAD);
 		}
 		bh_unlock_sock(ax25->sk);
+		local_bh_enable(); 
 	}
 }
diff -pruN a/net/rose/af_rose.c b/net/rose/af_rose.c
--- a/net/rose/af_rose.c	2007-10-12 18:43:44.000000000 +0200
+++ b/net/rose/af_rose.c	2007-12-02 10:06:31.000000000 +0100
@@ -62,7 +62,7 @@ int sysctl_rose_window_size             
 static HLIST_HEAD(rose_list);
 static DEFINE_SPINLOCK(rose_list_lock);
 
-static struct proto_ops rose_proto_ops;
+static const struct proto_ops rose_proto_ops;
 
 ax25_address rose_callsign;
 
@@ -741,7 +741,7 @@ static int rose_connect(struct socket *s
 	sk->sk_state   = TCP_CLOSE;
 	sock->state = SS_UNCONNECTED;
 
-	rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause,
+	rose->neighbour = __rose_get_neigh(&addr->srose_addr, &cause,
 					 &diagnostic);
 	if (!rose->neighbour)
 		return -ENETUNREACH;
@@ -773,7 +773,6 @@ static int rose_connect(struct socket *s
 
 		rose_insert_socket(sk);		/* Finish the bind */
 	}
-rose_try_next_neigh:
 	rose->dest_addr   = addr->srose_addr;
 	rose->dest_call   = addr->srose_call;
 	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
@@ -835,12 +834,6 @@ rose_try_next_neigh:
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
-	/* Try next neighbour */
-		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
-		if (rose->neighbour)
-			goto rose_try_next_neigh;
-
-		/* No more neighbours */
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);	/* Always set at this point */
 		goto out_release;
@@ -1481,7 +1474,7 @@ static struct net_proto_family rose_fami
 	.owner		=	THIS_MODULE,
 };
 
-static struct proto_ops rose_proto_ops = {
+static const struct proto_ops rose_proto_ops = {
 	.family		=	PF_ROSE,
 	.owner		=	THIS_MODULE,
 	.release	=	rose_release,
Les fichiers linux-2.6.23.9-orig/net/rose/rose.ko et linux-2.6.23.9/net/rose/rose.ko sont différents.
diff -pruN a/net/rose/rose_route.c b/net/rose/rose_route.c
--- a/net/rose/rose_route.c	2007-10-12 18:43:44.000000000 +0200
+++ b/net/rose/rose_route.c	2007-12-02 00:15:24.000000000 +0100
@@ -664,25 +664,22 @@ struct rose_route *rose_route_free_lci(u
 /*
  *	Find a neighbour given a ROSE address.
  */
-struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
+struct rose_neigh *__rose_get_neigh(rose_address *addr, unsigned char *cause,
 	unsigned char *diagnostic)
 {
-	struct rose_neigh *res = NULL;
 	struct rose_node *node;
 	int failed = 0;
 	int i;
 
-	spin_lock_bh(&rose_node_list_lock);
 	for (node = rose_node_list; node != NULL; node = node->next) {
 		if (rosecmpm(addr, &node->address, node->mask) == 0) {
 			for (i = 0; i < node->count; i++) {
-				if (!rose_ftimer_running(node->neighbour[i])) {
-					res = node->neighbour[i];
-					goto out;
-				} else
-					failed = 1;
+				if (node->neighbour[i]->restarted)
+					return node->neighbour[i];
+				if (!rose_ftimer_running(node->neighbour[i]))			
+					return node->neighbour[i];
+				failed = 1;
 			}
-			break;
 		}
 	}
 
@@ -694,7 +691,16 @@ struct rose_neigh *rose_get_neigh(rose_a
 		*diagnostic = 0;
 	}
 
-out:
+	return NULL;
+}
+
+struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
+	unsigned char *diagnostic)
+{
+	struct rose_neigh *res;
+
+	spin_lock_bh(&rose_node_list_lock);
+	res = __rose_get_neigh(addr, cause, diagnostic);
 	spin_unlock_bh(&rose_node_list_lock);
 
 	return res;
@@ -1019,7 +1025,7 @@ int rose_route_frame(struct sk_buff *skb
 		rose_route = rose_route->next;
 	}
 
-	if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) {
+	if ((new_neigh = __rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) {
 		rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic);
 		goto out;
 	}


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-12-04 22:26       ` Bernard Pidoux
@ 2007-12-04 23:17         ` Jarek Poplawski
  2007-12-05  0:45           ` Ralf Baechle
  0 siblings, 1 reply; 12+ messages in thread
From: Jarek Poplawski @ 2007-12-04 23:17 UTC (permalink / raw)
  To: Bernard Pidoux; +Cc: Jarek Poplawski, ralf, davem, netdev, Alexey Dobriyan

Bernard Pidoux wrote, On 12/04/2007 11:26 PM:

> Jarek Poplawski wrote:
>> Bernard Pidoux wrote, On 12/02/2007 06:37 PM:
>>
>>> Hi,
>>>
>>> Many thanks for your patch for ~/net/ax25/ax25_subr.c
>>>
>>> Introduction of local_bh_disable() ... local_bh_enable()
>>>
>>> cured the inconsistent lock state related to AX25 connect timeout.
>>>
>>> I have now a stable monoprocessor system running AX25 and ROSE network 
>>> packet switching application FPAC, whether kernel is compiled with or 
>>> without hack option.
>>>
>>> There is no more problem during normal operations.
>>>
>>> This was achieved, thanks to your AX25 patch and the patch from Alexey 
>>> Dobriyan for rose module.
>>>
>>> I also patched rose module in order to get packet routing more 
>>> efficient, taking into account the "restarted" flag that is raised when 
>>> a neighbour node is already connected.
>>>
>>> To summarize the present situation on my Linux machine, I built a patch 
>>> against kernel 2.6.23.9.
>>>
>>> I would appreciate if you could make it included into a next kernel release.
>> ... 
>>
>> Bernard, I'm very glad I could be a little helpful, but I'm not sure of
>> your intentions: my patch proposal is rather trivial interpretation of
>> lockdep's report; I haven't studied AX25 enough even to be sure there is
>> a real lockup possible in this place. Since this change looks not very
>> costly and quite safe, I can 'take a risk' to sign this off after your
>> testing. But anything more is beyond my 'range'.
>>
>> So, since you've spent quite a lot of time on this all, maybe it would
>> be simpler if you've tried the same with the current kernel, and resent
>> "proper" (not gzipped and with changelog) patch or patches. Then, I hope,
>> Ralf, as the maintainer, will make the rest.
>>
>> Regards,
>> Jarek P.
>>
>>
> 
> As required I send again in clear text the summary of ax25 and rose 
> patch against 2.6.23.9.
> As I said, the kernel stability, after applying these patch, is behind us.
> I did not observe any warning nor lockup after a few days of AX25 
> intensive use.
> Also rose module is handling routing of frames much more efficiently.
> 
> This will considerably help us to focus on application programs now.
> I am now concentrating my efforts on ROSE/FPAC and Linux FBB code 
> adjustement.
> 
> Thanks to you all for your help.
> 
> 73 de Bernard, f6bvp
> 
> 
> 


Bernard, I think you've forgotten at least about some distinct
changelog and signed-off-by?

Jarek P.
---

ax25_subr.c part:
Acked-by: Jarek Poplawski <jarkao2@gmail.com>

> ------------------------------------------------------------------------
> 
> diff -pruN a/include/net/rose.h b/include/net/rose.h
> --- a/include/net/rose.h	2007-10-12 18:43:44.000000000 +0200
> +++ b/include/net/rose.h	2007-12-01 23:56:57.000000000 +0100
> @@ -202,6 +202,7 @@ extern struct net_device *rose_dev_first
>  extern struct net_device *rose_dev_get(rose_address *);
>  extern struct rose_route *rose_route_free_lci(unsigned int, struct rose_neigh *);
>  extern struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *, unsigned char *);
> +extern struct rose_neigh *__rose_get_neigh(rose_address *, unsigned char *, unsigned char *);
>  extern int  rose_rt_ioctl(unsigned int, void __user *);
>  extern void rose_link_failed(ax25_cb *, int);
>  extern int  rose_route_frame(struct sk_buff *, ax25_cb *);
> diff -pruN a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
> --- a/net/ax25/ax25_subr.c	2007-10-12 18:43:44.000000000 +0200
> +++ b/net/ax25/ax25_subr.c	2007-12-01 23:32:01.000000000 +0100
> @@ -279,6 +279,7 @@ void ax25_disconnect(ax25_cb *ax25, int 
>  	ax25_link_failed(ax25, reason);
>  
>  	if (ax25->sk != NULL) {
> +		local_bh_disable();
>  		bh_lock_sock(ax25->sk);
>  		ax25->sk->sk_state     = TCP_CLOSE;
>  		ax25->sk->sk_err       = reason;
> @@ -288,5 +289,6 @@ void ax25_disconnect(ax25_cb *ax25, int 
>  			sock_set_flag(ax25->sk, SOCK_DEAD);
>  		}
>  		bh_unlock_sock(ax25->sk);
> +		local_bh_enable(); 
>  	}
>  }
> diff -pruN a/net/rose/af_rose.c b/net/rose/af_rose.c
> --- a/net/rose/af_rose.c	2007-10-12 18:43:44.000000000 +0200
> +++ b/net/rose/af_rose.c	2007-12-02 10:06:31.000000000 +0100
> @@ -62,7 +62,7 @@ int sysctl_rose_window_size             
>  static HLIST_HEAD(rose_list);
>  static DEFINE_SPINLOCK(rose_list_lock);
>  
> -static struct proto_ops rose_proto_ops;
> +static const struct proto_ops rose_proto_ops;
>  
>  ax25_address rose_callsign;
>  
> @@ -741,7 +741,7 @@ static int rose_connect(struct socket *s
>  	sk->sk_state   = TCP_CLOSE;
>  	sock->state = SS_UNCONNECTED;
>  
> -	rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause,
> +	rose->neighbour = __rose_get_neigh(&addr->srose_addr, &cause,
>  					 &diagnostic);
>  	if (!rose->neighbour)
>  		return -ENETUNREACH;
> @@ -773,7 +773,6 @@ static int rose_connect(struct socket *s
>  
>  		rose_insert_socket(sk);		/* Finish the bind */
>  	}
> -rose_try_next_neigh:
>  	rose->dest_addr   = addr->srose_addr;
>  	rose->dest_call   = addr->srose_call;
>  	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
> @@ -835,12 +834,6 @@ rose_try_next_neigh:
>  	}
>  
>  	if (sk->sk_state != TCP_ESTABLISHED) {
> -	/* Try next neighbour */
> -		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
> -		if (rose->neighbour)
> -			goto rose_try_next_neigh;
> -
> -		/* No more neighbours */
>  		sock->state = SS_UNCONNECTED;
>  		err = sock_error(sk);	/* Always set at this point */
>  		goto out_release;
> @@ -1481,7 +1474,7 @@ static struct net_proto_family rose_fami
>  	.owner		=	THIS_MODULE,
>  };
>  
> -static struct proto_ops rose_proto_ops = {
> +static const struct proto_ops rose_proto_ops = {
>  	.family		=	PF_ROSE,
>  	.owner		=	THIS_MODULE,
>  	.release	=	rose_release,
> Les fichiers linux-2.6.23.9-orig/net/rose/rose.ko et linux-2.6.23.9/net/rose/rose.ko sont différents.
> diff -pruN a/net/rose/rose_route.c b/net/rose/rose_route.c
> --- a/net/rose/rose_route.c	2007-10-12 18:43:44.000000000 +0200
> +++ b/net/rose/rose_route.c	2007-12-02 00:15:24.000000000 +0100
> @@ -664,25 +664,22 @@ struct rose_route *rose_route_free_lci(u
>  /*
>   *	Find a neighbour given a ROSE address.
>   */
> -struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
> +struct rose_neigh *__rose_get_neigh(rose_address *addr, unsigned char *cause,
>  	unsigned char *diagnostic)
>  {
> -	struct rose_neigh *res = NULL;
>  	struct rose_node *node;
>  	int failed = 0;
>  	int i;
>  
> -	spin_lock_bh(&rose_node_list_lock);
>  	for (node = rose_node_list; node != NULL; node = node->next) {
>  		if (rosecmpm(addr, &node->address, node->mask) == 0) {
>  			for (i = 0; i < node->count; i++) {
> -				if (!rose_ftimer_running(node->neighbour[i])) {
> -					res = node->neighbour[i];
> -					goto out;
> -				} else
> -					failed = 1;
> +				if (node->neighbour[i]->restarted)
> +					return node->neighbour[i];
> +				if (!rose_ftimer_running(node->neighbour[i]))			
> +					return node->neighbour[i];
> +				failed = 1;
>  			}
> -			break;
>  		}
>  	}
>  
> @@ -694,7 +691,16 @@ struct rose_neigh *rose_get_neigh(rose_a
>  		*diagnostic = 0;
>  	}
>  
> -out:
> +	return NULL;
> +}
> +
> +struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
> +	unsigned char *diagnostic)
> +{
> +	struct rose_neigh *res;
> +
> +	spin_lock_bh(&rose_node_list_lock);
> +	res = __rose_get_neigh(addr, cause, diagnostic);
>  	spin_unlock_bh(&rose_node_list_lock);
>  
>  	return res;
> @@ -1019,7 +1025,7 @@ int rose_route_frame(struct sk_buff *skb
>  		rose_route = rose_route->next;
>  	}
>  
> -	if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) {
> +	if ((new_neigh = __rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) {
>  		rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic);
>  		goto out;
>  	}
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Inconsistent lock  state and possible irq lock inversion dependency detected in ax25.ko
  2007-12-04 23:17         ` Jarek Poplawski
@ 2007-12-05  0:45           ` Ralf Baechle
  0 siblings, 0 replies; 12+ messages in thread
From: Ralf Baechle @ 2007-12-05  0:45 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: Bernard Pidoux, Jarek Poplawski, davem, netdev, Alexey Dobriyan

On Wed, Dec 05, 2007 at 12:17:47AM +0100, Jarek Poplawski wrote:

> Bernard, I think you've forgotten at least about some distinct
> changelog and signed-off-by?
> 
> Jarek P.
> ---
> 
> ax25_subr.c part:
> Acked-by: Jarek Poplawski <jarkao2@gmail.com>

The fix for the deadlock due to taking rose_node_list_lock twice:

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

Part of Bernard's patch is undoing d85838c55d836c33077344fab424f200f2827d84,
this one should probably reverted separately.

  Ralf

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] [ROSE] reverts commits d85838c55d836c33077344fab424f200f2827d84
  2007-10-08  6:44 ` David Miller
@ 2007-12-14 21:58   ` Bernard Pidoux
  0 siblings, 0 replies; 12+ messages in thread
From: Bernard Pidoux @ 2007-12-14 21:58 UTC (permalink / raw)
  To: David Miller; +Cc: adobriyan, ralf, netdev, Jarek Poplawski

[-- Attachment #1: Type: text/plain, Size: 322 bytes --]

Hi,

This patch against 2.6.24-rc5 reverts commit 
d85838c55d836c33077344fab424f200f2827d84,
that was supposed to detect all connected ROSE node neighbors.
Actually the loop was misplaced and the node list was not completely
explored.Next sublitted patch gives a solution.

signed off by Bernard Pidoux, f6bvp@amsat.org



[-- Attachment #2: rose-2.6.24-rc5.patch1 --]
[-- Type: text/plain, Size: 738 bytes --]

--- linux-2.6.24-rc5/net/rose/af_rose.c	2007-12-11 04:48:43.000000000 +0100
+++ b/net/rose/af_rose.c	2007-12-14 14:09:40.000000000 +0100
@@ -782,7 +782,6 @@
 
 		rose_insert_socket(sk);		/* Finish the bind */
 	}
-rose_try_next_neigh:
 	rose->dest_addr   = addr->srose_addr;
 	rose->dest_call   = addr->srose_call;
 	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
@@ -844,12 +843,6 @@
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
-	/* Try next neighbour */
-		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
-		if (rose->neighbour)
-			goto rose_try_next_neigh;
-
-		/* No more neighbours */
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);	/* Always set at this point */
 		goto out_release;

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2007-12-14 22:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-03 18:54 [PATCH] Fix rose.ko oops on unload Alexey Dobriyan
2007-10-03 19:04 ` Jeff Garzik
2007-10-03 19:21   ` Alexey Dobriyan
2007-10-08  6:44 ` David Miller
2007-12-14 21:58   ` [PATCH] [ROSE] reverts commits d85838c55d836c33077344fab424f200f2827d84 Bernard Pidoux
2007-11-21 22:13 ` Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko Bernard Pidoux
2007-11-28 13:48   ` Jarek Poplawski
2007-12-02 17:37   ` Bernard Pidoux
2007-12-02 22:02     ` Jarek Poplawski
2007-12-04 22:26       ` Bernard Pidoux
2007-12-04 23:17         ` Jarek Poplawski
2007-12-05  0:45           ` Ralf Baechle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).