linux-wireless.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Kernel oops when loading ath5k from compat-wireless in 2.6.27
@ 2008-11-14  5:46 Dan McGee
  2008-11-14  6:17 ` Luis R. Rodriguez
  2008-11-14 13:18 ` Bob Copeland
  0 siblings, 2 replies; 39+ messages in thread
From: Dan McGee @ 2008-11-14  5:46 UTC (permalink / raw)
  To: linux-wireless

Also reported here a few times, I've been seeing this bug every time I
try to load up the ath5k module in my kernel:
http://www.kerneloops.org/search.php?search=ieee80211_register_hw&btnG=Function+Search

If you guys have any suggestions, I'd love to hear them. I
disassembled the code in question but am not very good with these
things. It looks to be somewhere between lines 804-825 in
net/mac80211/main.c.

-Dan

ath5k_pci 0000:01:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
ath5k_pci 0000:01:00.0: setting latency timer to 64
ath5k_pci 0000:01:00.0: registered as ''
BUG: unable to handle kernel NULL pointer dereference at 00000000
IP: [<b80c1252>] :mac80211:ieee80211_register_hw+0x10f/0x2d6
*pde = 00000000
Oops: 0000 [#1] PREEMPT
Modules linked in: ath5k(+) mac80211

Pid: 818, comm: modprobe Not tainted (2.6.27.6eee #3)
EIP: 0060:[<b80c1252>] EFLAGS: 00010286 CPU: 0
EIP is at ieee80211_register_hw+0x10f/0x2d6 [mac80211]
EAX: 00000000 EBX: b7345000 ECX: 00000001 EDX: 00000001
ESI: b681c180 EDI: 00000000 EBP: b7183000 ESP: b7365e00
 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Process modprobe (pid: 818, ti=b7364000 task=b71f7130 task.ti=b7364000)
Stack: 00000000 00000000 b681cd00 b681cd00 b681fe06 b80ea639 b7365e70 b681cd00
       00000005 b7064458 b7365e70 b7064400 781689c9 b7007000 b8060000 b681cd00
       b681c180 00000000 b681fde4 b7183000 0c0c45e9 0000001a b681cde8 b7183000
Call Trace:
 [<b80ea639>] ath5k_pci_probe+0xc27/0x1150 [ath5k]
 [<781689c9>] find_inode+0x1b/0x56
 [<781e0b0e>] pci_device_probe+0x36/0x55
 [<78252342>] driver_probe_device+0xa1/0x132
 [<7825240a>] __driver_attach+0x37/0x55
 [<78251d86>] bus_for_each_dev+0x35/0x5c
 [<782521f1>] driver_attach+0x11/0x13
 [<782523d3>] __driver_attach+0x0/0x55
 [<7825184b>] bus_add_driver+0x91/0x1a7
 [<b802d000>] init_ath5k_pci+0x0/0x2f [ath5k]
 [<78252571>] driver_register+0x7d/0xd6
 [<b802d000>] init_ath5k_pci+0x0/0x2f [ath5k]
 [<781e0d11>] __pci_register_driver+0x35/0x60
 [<b802d01a>] init_ath5k_pci+0x1a/0x2f [ath5k]
 [<7810111f>] _stext+0x37/0xfb
 [<78137d76>] sys_init_module+0x87/0x175
 [<78102de9>] sysenter_do_call+0x12/0x25
 =======================
Code: 83 c8 00 00 00 21 03 c7 83 b4 00 00 00 1c 49 0d b8 c7 83 0c 02
00 00 ee 17 0c b8 8b 46 1c 8b 40 7c 8b 80 bc 00 00 00 6a 00 6a 00 <8b>
00 e8 75 64 06 c0 5f bf f4 ff ff ff 85 c0 89 46 20 5a 0f 84
EIP: [<b80c1252>] ieee80211_register_hw+0x10f/0x2d6 [mac80211] SS:ESP
0068:b7365e00
---[ end trace 0b0fad82f83782b1 ]---


GDB disassembly:

0x0000028f <ieee80211_register_hw+296>: cmpl   $0xd,0x30(%esi)
0x00000293 <ieee80211_register_hw+300>: mov    $0xd,%eax
0x00000298 <ieee80211_register_hw+305>: cmovae 0x30(%esi),%eax
0x0000029c <ieee80211_register_hw+309>: cmpl   $0x9,(%esi)
0x0000029f <ieee80211_register_hw+312>: mov    %eax,0x9c(%esi)
0x000002a5 <ieee80211_register_hw+318>: jg     0x2ad <ieee80211_register_hw+326>
0x000002a7 <ieee80211_register_hw+320>: movl   $0x64,(%esi)
0x000002ad <ieee80211_register_hw+326>: cmpw   $0x0,0x44(%esi)
0x000002b2 <ieee80211_register_hw+331>: jne    0x2ba <ieee80211_register_hw+339>
0x000002b4 <ieee80211_register_hw+333>: movw   $0x1,0x44(%esi)
0x000002ba <ieee80211_register_hw+339>: mov    0x2c(%esi),%ecx
0x000002bd <ieee80211_register_hw+342>: mov    0x44(%esi),%eax
0x000002c0 <ieee80211_register_hw+345>: mov    %ecx,%edx
0x000002c2 <ieee80211_register_hw+347>: mov    %ax,0xc(%esi)
---Type <return> to continue, or q <return> to quit---
0x000002c6 <ieee80211_register_hw+351>: mov    %cl,%al
0x000002c8 <ieee80211_register_hw+353>: and    $0xe0,%al
0x000002ca <ieee80211_register_hw+355>: cmp    $0x1,%al
0x000002cc <ieee80211_register_hw+357>: sbb    %eax,%eax
0x000002ce <ieee80211_register_hw+359>: and    $0x100,%edx
0x000002d4 <ieee80211_register_hw+365>: and    $0xf,%al
0x000002d6 <ieee80211_register_hw+367>: inc    %al
^^^ HERE ^^^
0x000002d8 <ieee80211_register_hw+369>: or     0x98(%esi),%al
0x000002de <ieee80211_register_hw+375>: cmp    $0x1,%edx
0x000002e1 <ieee80211_register_hw+378>: sbb    %edx,%edx
0x000002e3 <ieee80211_register_hw+380>: and    $0x3c,%dl
0x000002e6 <ieee80211_register_hw+383>: add    $0x4,%dl
0x000002e9 <ieee80211_register_hw+386>: or     %al,%dl
0x000002eb <ieee80211_register_hw+388>: test   %cl,%cl
0x000002ed <ieee80211_register_hw+390>: mov    %dl,0x98(%esi)
0x000002f3 <ieee80211_register_hw+396>: jns    0x2fe <ieee80211_register_hw+407>
0x000002f5 <ieee80211_register_hw+398>: or     $0x8,%dl
0x000002f8 <ieee80211_register_hw+401>: mov    %dl,0x98(%esi)
0x000002fe <ieee80211_register_hw+407>: mov    %esi,%eax
0x00000300 <ieee80211_register_hw+409>: call   0x301 <ieee80211_register_hw+410>
0x00000305 <ieee80211_register_hw+414>: test   %eax,%eax
0x00000307 <ieee80211_register_hw+416>: mov    %eax,%edi
0x00000309 <ieee80211_register_hw+418>: js     0x41b <ieee80211_register_hw+692>
0x0000030f <ieee80211_register_hw+424>: call   0x310 <ieee80211_register_hw+425>
0x00000314 <ieee80211_register_hw+429>: mov    0x54(%esi),%eax
0x00000317 <ieee80211_register_hw+432>: mov    %eax,%edx

GCC assembly compile:

	call	ieee80211_rx_bss_list_init
	movl	$1, %edx
	movl	$1, %ecx
	movl	$ieee80211_master_start_xmit, 412(%ebx)
	movl	$ieee80211_master_open, 508(%ebx)
	movl	$ieee80211_master_stop, 512(%ebx)
	movw	$801, 200(%ebx)
	movl	$ieee80211_header_ops, 180(%ebx)
	movl	$ieee80211_master_set_multicast_list, 524(%ebx)
	movl	28(%esi), %eax
	movl	124(%eax), %eax
	movl	188(%eax), %eax
	pushl	$0
	pushl	$0
	movl	(%eax), %eax
	call	__create_workqueue_key
	popl	%edi
	movl	$-12, %edi
	testl	%eax, %eax
	movl	%eax, 32(%esi)
	popl	%edx
	je	.L34
	cmpl	$13, 48(%esi)
	movl	$13, %eax
	cmovae	48(%esi), %eax
	cmpl	$9, (%esi)
	movl	%eax, 156(%esi)
	jg	.L35
	movl	$100, (%esi)
.L35:
	cmpw	$0, 68(%esi)
	jne	.L36
	movw	$1, 68(%esi)
.L36:
	movl	44(%esi), %ecx
	movl	68(%esi), %eax
	movl	%ecx, %edx
	movw	%ax, 12(%esi)
	movb	%cl, %al
	andb	$-32, %al
	cmpb	$1, %al
	sbbl	%eax, %eax
	andl	$256, %edx
	andb	$15, %al
	incb	%al
^^^ HERE ^^^
	orb	152(%esi), %al
	cmpl	$1, %edx
	sbbl	%edx, %edx
	andb	$60, %dl
	addb	$4, %dl
	orb	%al, %dl
	testb	%cl, %cl
	movb	%dl, 152(%esi)
	jns	.L41
	orb	$8, %dl
	movb	%dl, 152(%esi)
.L41:
	movl	%esi, %eax
	call	sta_info_start
	testl	%eax, %eax
	movl	%eax, %edi
	js	.L42
	call	rtnl_lock
	movl	84(%esi), %eax
	movl	%eax, %edx
	call	dev_alloc_name
	testl	%eax, %eax
	movl	%eax, %edi
	js	.L43
	movl	28(%esi), %edx
	movl	84(%esi), %ecx
	movl	(%edx), %eax
	movl	%eax, 308(%ecx)
	movw	4(%edx), %ax
	movw	%ax, 312(%ecx)
	movl	28(%esi), %eax
	movl	84(%esi), %edx
	movl	124(%eax), %eax
	movl	%eax, 688(%edx)
	movl	84(%esi), %eax
	call	register_netdevice
	testl	%eax, %eax
	movl	%eax, %edi
	js	.L43
	movl	36(%esi), %edx
	movl	%esi, %eax
	call	ieee80211_init_rate_ctrl_alg

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14  5:46 Kernel oops when loading ath5k from compat-wireless in 2.6.27 Dan McGee
@ 2008-11-14  6:17 ` Luis R. Rodriguez
  2008-11-14 17:02   ` Bob Copeland
  2008-11-14 13:18 ` Bob Copeland
  1 sibling, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14  6:17 UTC (permalink / raw)
  To: Dan McGee; +Cc: linux-wireless, Michael Buesch

On Thu, Nov 13, 2008 at 9:46 PM, Dan McGee <dpmcgee@gmail.com> wrote:
> Also reported here a few times, I've been seeing this bug every time I
> try to load up the ath5k module in my kernel:
> http://www.kerneloops.org/search.php?search=ieee80211_register_hw&btnG=Function+Search
>
> If you guys have any suggestions, I'd love to hear them. I
> disassembled the code in question but am not very good with these
> things. It looks to be somewhere between lines 804-825 in
> net/mac80211/main.c.
>
> -Dan
>
> ath5k_pci 0000:01:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
> ath5k_pci 0000:01:00.0: setting latency timer to 64
> ath5k_pci 0000:01:00.0: registered as ''
> BUG: unable to handle kernel NULL pointer dereference at 00000000
> IP: [<b80c1252>] :mac80211:ieee80211_register_hw+0x10f/0x2d6
> *pde = 00000000
> Oops: 0000 [#1] PREEMPT
> Modules linked in: ath5k(+) mac80211
>
> Pid: 818, comm: modprobe Not tainted (2.6.27.6eee #3)
> EIP: 0060:[<b80c1252>] EFLAGS: 00010286 CPU: 0
> EIP is at ieee80211_register_hw+0x10f/0x2d6 [mac80211]

I don't know assembly but if you compile with debugging symbols you
can pin point the line at which your EIP points to with gdb.

Run

gdb /lib/modules/`uname -r`/kernel/net/mac80211.ko

Then

l *(ieee80211_register_hw+0x10f)

If our offsets are the same then its probably on line 791:

(gdb) l *(ieee80211_register_hw+0x10f)
0x2df is in ieee80211_register_hw (net/mac80211/main.c:791).
786             mdev->type = ARPHRD_IEEE80211;
787             mdev->header_ops = &ieee80211_header_ops;
788             mdev->set_multicast_list = ieee80211_master_set_multicast_list;
789
790             name = wiphy_dev(local->hw.wiphy)->driver->name;
791             local->hw.workqueue = create_freezeable_workqueue(name);
792             if (!local->hw.workqueue) {
793                     result = -ENOMEM;
794                     goto fail_workqueue;
795             }

I have run into this before but on a very rare occasion and it was
with iwlagn,  and I wasn't able to find the culprit. Do you run into
this all the time?

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14  5:46 Kernel oops when loading ath5k from compat-wireless in 2.6.27 Dan McGee
  2008-11-14  6:17 ` Luis R. Rodriguez
@ 2008-11-14 13:18 ` Bob Copeland
  1 sibling, 0 replies; 39+ messages in thread
From: Bob Copeland @ 2008-11-14 13:18 UTC (permalink / raw)
  To: Dan McGee; +Cc: linux-wireless, mcgrof

On Thu, Nov 13, 2008 at 11:46:06PM -0600, Dan McGee wrote:
> 
> GDB disassembly:
> 
> 0x000002cc <ieee80211_register_hw+357>: sbb    %eax,%eax
> 0x000002ce <ieee80211_register_hw+359>: and    $0x100,%edx
> 0x000002d4 <ieee80211_register_hw+365>: and    $0xf,%al
> 0x000002d6 <ieee80211_register_hw+367>: inc    %al
> ^^^ HERE ^^^

I don't think this disassembly is right.  inc %al is not a very likely 
faulting instruction, especially if we just did a successful AND on the
register.  Luis' suggestion to use gdb to directly list the C code is 
good; here's another way:

Run scripts/decodecode on the "Code: ...." portion (output below).
There you can see that the faulting instruction is actually 
"mov (%eax), %eax" which makes sense because %eax in the register dump
is zero, so it's clearly a null pointer dereference.  We're
dereferencing some pointer at offset 0xbc in some structure.

Now you can do objdump -S net/mac80211/main.o to see mixed C and
assembly, and look for a segment of code that matches your disassembly.  
Sometimes that can be a bit confusing with inlines and code scheduling, 
but that's the nature of the beast.

(I haven't had a chance to look at the actual code yet.)

Disassembly of section .text:

00000000 <.text>:
   0:	83 c8 00             	or     $0x0,%eax
   3:	00 00                	add    %al,(%eax)
   5:	21 03                	and    %eax,(%ebx)
   7:	c7 83 b4 00 00 00 1c 	movl   $0xb80d491c,0xb4(%ebx)
   e:	49 0d b8 
  11:	c7 83 0c 02 00 00 ee 	movl   $0xb80c17ee,0x20c(%ebx)
  18:	17 0c b8 
  1b:	8b 46 1c             	mov    0x1c(%esi),%eax
  1e:	8b 40 7c             	mov    0x7c(%eax),%eax
  21:	8b 80 bc 00 00 00    	mov    0xbc(%eax),%eax
  27:	6a 00                	push   $0x0
  29:	6a 00                	push   $0x0
   0:	8b 00                	mov    (%eax),%eax      <--- HERE
   2:	e8 75 64 06 c0       	call   0xc006647c
   7:	5f                   	pop    %edi
   8:	bf f4 ff ff ff       	mov    $0xfffffff4,%edi
   d:	85 c0                	test   %eax,%eax
   f:	89 46 20             	mov    %eax,0x20(%esi)
  12:	5a                   	pop    %edx
  13:	0f                   	.byte 0xf
  14:	84                   	.byte 0x84

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14  6:17 ` Luis R. Rodriguez
@ 2008-11-14 17:02   ` Bob Copeland
  2008-11-14 17:37     ` Luis R. Rodriguez
  2008-11-16  2:11     ` Dan McGee
  0 siblings, 2 replies; 39+ messages in thread
From: Bob Copeland @ 2008-11-14 17:02 UTC (permalink / raw)
  To: Luis R. Rodriguez; +Cc: Dan McGee, linux-wireless, Michael Buesch

On Fri, Nov 14, 2008 at 1:17 AM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> If our offsets are the same then its probably on line 791:
[...]
> 790             name = wiphy_dev(local->hw.wiphy)->driver->name;
> 791             local->hw.workqueue = create_freezeable_workqueue(name);

I agree, having looked at the objdump output.  Hmm, maybe ->driver pointer
is bad even though I can't see that happening.  Dan, can you try adding a
printk before line 790 to see if any of the pointers are null?

Also, do you have CONFIG_LOCKDEP enabled or no?

-- 
Bob Copeland %% www.bobcopeland.com

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 17:02   ` Bob Copeland
@ 2008-11-14 17:37     ` Luis R. Rodriguez
  2008-11-14 17:49       ` Bob Copeland
  2008-11-16  2:11     ` Dan McGee
  1 sibling, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14 17:37 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Dan McGee, linux-wireless, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 9:02 AM, Bob Copeland <me@bobcopeland.com> wrote:
> On Fri, Nov 14, 2008 at 1:17 AM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> If our offsets are the same then its probably on line 791:
> [...]
>> 790             name = wiphy_dev(local->hw.wiphy)->driver->name;
>> 791             local->hw.workqueue = create_freezeable_workqueue(name);
>
> I agree, having looked at the objdump output.  Hmm, maybe ->driver pointer
> is bad even though I can't see that happening.

Yeah I tried digging through the work queue logic thing and don't see
why name would be NULL. I believe the name comes from the driver's
stack memory when it sets up the pci device driver structure. Only
puzzling thing for me is wiphy_dev() gets the wiphy->dev.parent, that
still hasn't sunken in yet for me, and how that gets you
->driver->name or how this can possibly be NULL.

Anyway we do set the parent before calling ieee80211_register_hw() by
using SET_IEEE80211_DEV(hw, &pdev->dev) (oh ok here is the parent). so
when we try to get the name with parent->driver I am not sure if
driver has been set yet because ath5k_pci_probe() hasn't finished yet
as probe called ath5k_hw_attach(). When does ->driver get set and why
would it fail only for ath5k?

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 17:37     ` Luis R. Rodriguez
@ 2008-11-14 17:49       ` Bob Copeland
  2008-11-14 18:06         ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-14 17:49 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Dan McGee, linux-wireless, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> Anyway we do set the parent before calling ieee80211_register_hw() by
> using SET_IEEE80211_DEV(hw, &pdev->dev) (oh ok here is the parent). so
> when we try to get the name with parent->driver I am not sure if
> driver has been set yet because ath5k_pci_probe() hasn't finished yet
> as probe called ath5k_hw_attach(). When does ->driver get set and why
> would it fail only for ath5k?

Ahh, I believe that would be in pci-driver.c __pci_device_probe, after driver
probe is run successfully.  Yes, we do ieee80211_register_hw from within
probe...  So, how did this ever work again? /me scratches head.

-- 
Bob Copeland %% www.bobcopeland.com

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 17:49       ` Bob Copeland
@ 2008-11-14 18:06         ` Luis R. Rodriguez
  2008-11-14 18:13           ` Sujith
  2008-11-14 18:34           ` Bob Copeland
  0 siblings, 2 replies; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14 18:06 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Dan McGee, linux-wireless, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 9:49 AM, Bob Copeland <me@bobcopeland.com> wrote:
> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> Anyway we do set the parent before calling ieee80211_register_hw() by
>> using SET_IEEE80211_DEV(hw, &pdev->dev) (oh ok here is the parent). so
>> when we try to get the name with parent->driver I am not sure if
>> driver has been set yet because ath5k_pci_probe() hasn't finished yet
>> as probe called ath5k_hw_attach(). When does ->driver get set and why
>> would it fail only for ath5k?
>
> Ahh, I believe that would be in pci-driver.c __pci_device_probe, after driver
> probe is run successfully.  Yes, we do ieee80211_register_hw from within
> probe...  So, how did this ever work again? /me scratches head.

Wow.. ok, yeah I see that, this is a mac8021 bug then unless mac80211
wants us to not callieee80211_register_hw() from probe or its children
which would be weird.

So we need to fix that.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 18:06         ` Luis R. Rodriguez
@ 2008-11-14 18:13           ` Sujith
  2008-11-14 18:29             ` Luis R. Rodriguez
  2008-11-14 18:34           ` Bob Copeland
  1 sibling, 1 reply; 39+ messages in thread
From: Sujith @ 2008-11-14 18:13 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Bob Copeland, Dan McGee, linux-wireless, Michael Buesch,
	Johannes Berg

Luis R. Rodriguez wrote:
> On Fri, Nov 14, 2008 at 9:49 AM, Bob Copeland <me@bobcopeland.com> wrote:
> > On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> >> Anyway we do set the parent before calling ieee80211_register_hw() by
> >> using SET_IEEE80211_DEV(hw, &pdev->dev) (oh ok here is the parent). so
> >> when we try to get the name with parent->driver I am not sure if
> >> driver has been set yet because ath5k_pci_probe() hasn't finished yet
> >> as probe called ath5k_hw_attach(). When does ->driver get set and why
> >> would it fail only for ath5k?
> >
> > Ahh, I believe that would be in pci-driver.c __pci_device_probe, after driver
> > probe is run successfully.  Yes, we do ieee80211_register_hw from within
> > probe...  So, how did this ever work again? /me scratches head.
> 
> Wow.. ok, yeah I see that, this is a mac8021 bug then unless mac80211
> wants us to not callieee80211_register_hw() from probe or its children
> which would be weird.

>From the original log:
ath5k_pci 0000:01:00.0: registered as ''

Sujith

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 18:13           ` Sujith
@ 2008-11-14 18:29             ` Luis R. Rodriguez
  2008-11-14 18:33               ` Sujith
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14 18:29 UTC (permalink / raw)
  To: Sujith
  Cc: Bob Copeland, Dan McGee, linux-wireless, Michael Buesch,
	Johannes Berg

On Fri, Nov 14, 2008 at 10:13 AM, Sujith <m.sujith@gmail.com> wrote:
> Luis R. Rodriguez wrote:
>> On Fri, Nov 14, 2008 at 9:49 AM, Bob Copeland <me@bobcopeland.com> wrote:
>> > On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> >> Anyway we do set the parent before calling ieee80211_register_hw() by
>> >> using SET_IEEE80211_DEV(hw, &pdev->dev) (oh ok here is the parent). so
>> >> when we try to get the name with parent->driver I am not sure if
>> >> driver has been set yet because ath5k_pci_probe() hasn't finished yet
>> >> as probe called ath5k_hw_attach(). When does ->driver get set and why
>> >> would it fail only for ath5k?
>> >
>> > Ahh, I believe that would be in pci-driver.c __pci_device_probe, after driver
>> > probe is run successfully.  Yes, we do ieee80211_register_hw from within
>> > probe...  So, how did this ever work again? /me scratches head.
>>
>> Wow.. ok, yeah I see that, this is a mac8021 bug then unless mac80211
>> wants us to not callieee80211_register_hw() from probe or its children
>> which would be weird.
>
> From the original log:
> ath5k_pci 0000:01:00.0: registered as ''

That comes from

dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));

which means that may be buggy too as wiphy_name() gets

return wiphy->dev.bus_id;

and we only have set at that point wiphy->dev.parent IIRC.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 18:29             ` Luis R. Rodriguez
@ 2008-11-14 18:33               ` Sujith
  2008-11-14 18:37                 ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Sujith @ 2008-11-14 18:33 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Sujith, Bob Copeland, Dan McGee, linux-wireless, Michael Buesch,
	Johannes Berg

Luis R. Rodriguez wrote:
> > From the original log:
> > ath5k_pci 0000:01:00.0: registered as ''
> 
> That comes from
> 
> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
> 
> which means that may be buggy too as wiphy_name() gets
> 
> return wiphy->dev.bus_id;
> 
> and we only have set at that point wiphy->dev.parent IIRC.
> 

Nope, parent is set after that call in SET_IEEE80211_DEV()
phy[x] gets assigned in alloc_hw().

So why is it NULL even if alloc_hw() succeeds ?

Sujith

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 18:06         ` Luis R. Rodriguez
  2008-11-14 18:13           ` Sujith
@ 2008-11-14 18:34           ` Bob Copeland
  1 sibling, 0 replies; 39+ messages in thread
From: Bob Copeland @ 2008-11-14 18:34 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Dan McGee, linux-wireless, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 1:06 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> Wow.. ok, yeah I see that, this is a mac8021 bug then unless mac80211
> wants us to not callieee80211_register_hw() from probe or its children
> which would be weird.
>
> So we need to fix that.

Err, wait, I'm wrong.  That's pci_dev->driver, not pci_dev->dev->driver.

-- 
Bob Copeland %% www.bobcopeland.com

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 18:33               ` Sujith
@ 2008-11-14 18:37                 ` Luis R. Rodriguez
  2008-11-14 19:33                   ` Dan McGee
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14 18:37 UTC (permalink / raw)
  To: Sujith
  Cc: Bob Copeland, Dan McGee, linux-wireless, Michael Buesch,
	Johannes Berg

On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
> Luis R. Rodriguez wrote:
>> > From the original log:
>> > ath5k_pci 0000:01:00.0: registered as ''
>>
>> That comes from
>>
>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>>
>> which means that may be buggy too as wiphy_name() gets
>>
>> return wiphy->dev.bus_id;
>>
>> and we only have set at that point wiphy->dev.parent IIRC.
>>
>
> Nope, parent is set after that call in SET_IEEE80211_DEV()
> phy[x] gets assigned in alloc_hw().
>
> So why is it NULL even if alloc_hw() succeeds ?

Parent is wiphy->dev.parent not wiphy->dev though

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 18:37                 ` Luis R. Rodriguez
@ 2008-11-14 19:33                   ` Dan McGee
  2008-11-14 20:33                     ` Bob Copeland
  0 siblings, 1 reply; 39+ messages in thread
From: Dan McGee @ 2008-11-14 19:33 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Sujith, Bob Copeland, linux-wireless, Michael Buesch,
	Johannes Berg

On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
>> Luis R. Rodriguez wrote:
>>> > From the original log:
>>> > ath5k_pci 0000:01:00.0: registered as ''
>>>
>>> That comes from
>>>
>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>>>
>>> which means that may be buggy too as wiphy_name() gets
>>>
>>> return wiphy->dev.bus_id;
>>>
>>> and we only have set at that point wiphy->dev.parent IIRC.
>>>
>>
>> Nope, parent is set after that call in SET_IEEE80211_DEV()
>> phy[x] gets assigned in alloc_hw().
>>
>> So why is it NULL even if alloc_hw() succeeds ?
>
> Parent is wiphy->dev.parent not wiphy->dev though

So I'm at work for another 4 or 5 hours, but I should have a chance to
hack at anything tonight you guys want me to. Let me know if the
earlier suggestions in this thread still apply.

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 19:33                   ` Dan McGee
@ 2008-11-14 20:33                     ` Bob Copeland
  2008-11-14 20:41                       ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-14 20:33 UTC (permalink / raw)
  To: Dan McGee
  Cc: Luis R. Rodriguez, Sujith, linux-wireless, Michael Buesch,
	Johannes Berg

On Fri, Nov 14, 2008 at 2:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
>>> Luis R. Rodriguez wrote:
>>>> > From the original log:
>>>> > ath5k_pci 0000:01:00.0: registered as ''
>>>>
>>>> That comes from
>>>>
>>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>>>>
>>>> which means that may be buggy too as wiphy_name() gets
>>>>
>>>> return wiphy->dev.bus_id;
>>>>
>>>> and we only have set at that point wiphy->dev.parent IIRC.
>>>>
>>>
>>> Nope, parent is set after that call in SET_IEEE80211_DEV()
>>> phy[x] gets assigned in alloc_hw().
>>>
>>> So why is it NULL even if alloc_hw() succeeds ?
>>
>> Parent is wiphy->dev.parent not wiphy->dev though

Sujith is right, wiphy->dev.bus_id should be set by the time that's printed,
since wiphy_new in alloc_hw fills it in.  Curiouser and curiouser.  I look
forward to this being an obvious and embarrassing bug :)

> So I'm at work for another 4 or 5 hours, but I should have a chance to
> hack at anything tonight you guys want me to. Let me know if the
> earlier suggestions in this thread still apply.

I think the printks will still be useful, if only to verify we're on
the right track.  Which compat-wireless version are you using?

BTW from kerneloops, several people are having this with other drivers than
ath5k.

-- 
Bob Copeland %% www.bobcopeland.com

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 20:33                     ` Bob Copeland
@ 2008-11-14 20:41                       ` Luis R. Rodriguez
  2008-11-14 21:13                         ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14 20:41 UTC (permalink / raw)
  To: Bob Copeland
  Cc: Dan McGee, Sujith, linux-wireless, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 12:33 PM, Bob Copeland <me@bobcopeland.com> wrote:
> On Fri, Nov 14, 2008 at 2:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>>> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
>>>> Luis R. Rodriguez wrote:
>>>>> > From the original log:
>>>>> > ath5k_pci 0000:01:00.0: registered as ''
>>>>>
>>>>> That comes from
>>>>>
>>>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>>>>>
>>>>> which means that may be buggy too as wiphy_name() gets
>>>>>
>>>>> return wiphy->dev.bus_id;
>>>>>
>>>>> and we only have set at that point wiphy->dev.parent IIRC.
>>>>>
>>>>
>>>> Nope, parent is set after that call in SET_IEEE80211_DEV()
>>>> phy[x] gets assigned in alloc_hw().
>>>>
>>>> So why is it NULL even if alloc_hw() succeeds ?
>>>
>>> Parent is wiphy->dev.parent not wiphy->dev though
>
> Sujith is right, wiphy->dev.bus_id should be set by the time that's printed,
> since wiphy_new in alloc_hw fills it in.

Ah, I see, yes, oh man. Well this is good :)

> Curiouser and curiouser.  I look
> forward to this being an obvious and embarrassing bug :)

I sure hope so.

>> So I'm at work for another 4 or 5 hours, but I should have a chance to
>> hack at anything tonight you guys want me to. Let me know if the
>> earlier suggestions in this thread still apply.
>
> I think the printks will still be useful, if only to verify we're on
> the right track.  Which compat-wireless version are you using?
>
> BTW from kerneloops, several people are having this with other drivers than
> ath5k.

Yeah I ran into it with iwlagn a few days ago but never found the root
cause, it was sporadic.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 20:41                       ` Luis R. Rodriguez
@ 2008-11-14 21:13                         ` Luis R. Rodriguez
  2008-11-15  0:25                           ` Dan McGee
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-14 21:13 UTC (permalink / raw)
  To: Bob Copeland
  Cc: Dan McGee, Sujith, linux-wireless, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 12:41 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> On Fri, Nov 14, 2008 at 12:33 PM, Bob Copeland <me@bobcopeland.com> wrote:
>> On Fri, Nov 14, 2008 at 2:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>>> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>>>> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
>>>>> Luis R. Rodriguez wrote:
>>>>>> > From the original log:
>>>>>> > ath5k_pci 0000:01:00.0: registered as ''
>>>>>>
>>>>>> That comes from
>>>>>>
>>>>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>>>>>>
>>>>>> which means that may be buggy too as wiphy_name() gets
>>>>>>
>>>>>> return wiphy->dev.bus_id;
>>>>>>
>>>>>> and we only have set at that point wiphy->dev.parent IIRC.
>>>>>>
>>>>>
>>>>> Nope, parent is set after that call in SET_IEEE80211_DEV()
>>>>> phy[x] gets assigned in alloc_hw().
>>>>>
>>>>> So why is it NULL even if alloc_hw() succeeds ?
>>>>
>>>> Parent is wiphy->dev.parent not wiphy->dev though
>>
>> Sujith is right, wiphy->dev.bus_id should be set by the time that's printed,
>> since wiphy_new in alloc_hw fills it in.
>
> Ah, I see, yes, oh man. Well this is good :)

OK then try the patch in the other thread. If that doesn't fix it then
it has to be another issue.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 21:13                         ` Luis R. Rodriguez
@ 2008-11-15  0:25                           ` Dan McGee
  2008-11-15  0:36                             ` Luis R. Rodriguez
  2008-11-15  0:38                             ` Bob Copeland
  0 siblings, 2 replies; 39+ messages in thread
From: Dan McGee @ 2008-11-15  0:25 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Bob Copeland, Sujith, linux-wireless, Michael Buesch,
	Johannes Berg

On Fri, Nov 14, 2008 at 3:13 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> On Fri, Nov 14, 2008 at 12:41 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> On Fri, Nov 14, 2008 at 12:33 PM, Bob Copeland <me@bobcopeland.com> wrote:
>>> On Fri, Nov 14, 2008 at 2:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>>>> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>>>>> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
>>>>>> Luis R. Rodriguez wrote:
>>>>>>> > From the original log:
>>>>>>> > ath5k_pci 0000:01:00.0: registered as ''
>>>>>>>
>>>>>>> That comes from
>>>>>>>
>>>>>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>>>>>>>
>>>>>>> which means that may be buggy too as wiphy_name() gets
>>>>>>>
>>>>>>> return wiphy->dev.bus_id;
>>>>>>>
>>>>>>> and we only have set at that point wiphy->dev.parent IIRC.
>>>>>>>
>>>>>>
>>>>>> Nope, parent is set after that call in SET_IEEE80211_DEV()
>>>>>> phy[x] gets assigned in alloc_hw().
>>>>>>
>>>>>> So why is it NULL even if alloc_hw() succeeds ?
>>>>>
>>>>> Parent is wiphy->dev.parent not wiphy->dev though
>>>
>>> Sujith is right, wiphy->dev.bus_id should be set by the time that's printed,
>>> since wiphy_new in alloc_hw fills it in.
>>
>> Ah, I see, yes, oh man. Well this is good :)
>
> OK then try the patch in the other thread. If that doesn't fix it then
> it has to be another issue.

Can someone point me to the "other thread"? I don't think I was CCed
on it since I came late to the party.

Someone also inquired about versions. I'm using stable kernel 2.6.27.6
and compat-wireless-2008-11-13 (10-28 was doing the same thing
however).

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15  0:25                           ` Dan McGee
@ 2008-11-15  0:36                             ` Luis R. Rodriguez
  2008-11-15  2:05                               ` Dan McGee
  2008-11-15  0:38                             ` Bob Copeland
  1 sibling, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-15  0:36 UTC (permalink / raw)
  To: Dan McGee
  Cc: Luis R. Rodriguez, Bob Copeland, Sujith,
	linux-wireless@vger.kernel.org, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 04:25:53PM -0800, Dan McGee wrote:
> On Fri, Nov 14, 2008 at 3:13 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> > On Fri, Nov 14, 2008 at 12:41 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> >> On Fri, Nov 14, 2008 at 12:33 PM, Bob Copeland <me@bobcopeland.com> wrote:
> >>> On Fri, Nov 14, 2008 at 2:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
> >>>> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> >>>>> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
> >>>>>> Luis R. Rodriguez wrote:
> >>>>>>> > From the original log:
> >>>>>>> > ath5k_pci 0000:01:00.0: registered as ''
> >>>>>>>
> >>>>>>> That comes from
> >>>>>>>
> >>>>>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
> >>>>>>>
> >>>>>>> which means that may be buggy too as wiphy_name() gets
> >>>>>>>
> >>>>>>> return wiphy->dev.bus_id;
> >>>>>>>
> >>>>>>> and we only have set at that point wiphy->dev.parent IIRC.
> >>>>>>>
> >>>>>>
> >>>>>> Nope, parent is set after that call in SET_IEEE80211_DEV()
> >>>>>> phy[x] gets assigned in alloc_hw().
> >>>>>>
> >>>>>> So why is it NULL even if alloc_hw() succeeds ?
> >>>>>
> >>>>> Parent is wiphy->dev.parent not wiphy->dev though
> >>>
> >>> Sujith is right, wiphy->dev.bus_id should be set by the time that's printed,
> >>> since wiphy_new in alloc_hw fills it in.
> >>
> >> Ah, I see, yes, oh man. Well this is good :)
> >
> > OK then try the patch in the other thread. If that doesn't fix it then
> > it has to be another issue.
> 
> Can someone point me to the "other thread"? I don't think I was CCed
> on it since I came late to the party.
> 
> Someone also inquired about versions. I'm using stable kernel 2.6.27.6
> and compat-wireless-2008-11-13 (10-28 was doing the same thing
> however).

Bounced you a copy. Please give it a try.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15  0:25                           ` Dan McGee
  2008-11-15  0:36                             ` Luis R. Rodriguez
@ 2008-11-15  0:38                             ` Bob Copeland
  1 sibling, 0 replies; 39+ messages in thread
From: Bob Copeland @ 2008-11-15  0:38 UTC (permalink / raw)
  To: Dan McGee
  Cc: Luis R. Rodriguez, Sujith, linux-wireless, Michael Buesch,
	Johannes Berg

On Fri, Nov 14, 2008 at 06:25:53PM -0600, Dan McGee wrote:
> Can someone point me to the "other thread"? I don't think I was CCed
> on it since I came late to the party.
> 
> Someone also inquired about versions. I'm using stable kernel 2.6.27.6
> and compat-wireless-2008-11-13 (10-28 was doing the same thing
> however).

The thread is over there:

http://marc.info/?l=linux-wireless&m=122670266931930&w=2

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15  0:36                             ` Luis R. Rodriguez
@ 2008-11-15  2:05                               ` Dan McGee
  2008-11-15  2:29                                 ` Bob Copeland
  0 siblings, 1 reply; 39+ messages in thread
From: Dan McGee @ 2008-11-15  2:05 UTC (permalink / raw)
  To: Luis R. Rodriguez
  Cc: Luis R. Rodriguez, Bob Copeland, Sujith,
	linux-wireless@vger.kernel.org, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 6:36 PM, Luis R. Rodriguez
<lrodriguez@atheros.com> wrote:
> On Fri, Nov 14, 2008 at 04:25:53PM -0800, Dan McGee wrote:
>> On Fri, Nov 14, 2008 at 3:13 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> > On Fri, Nov 14, 2008 at 12:41 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> >> On Fri, Nov 14, 2008 at 12:33 PM, Bob Copeland <me@bobcopeland.com> wrote:
>> >>> On Fri, Nov 14, 2008 at 2:33 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>> >>>> On Fri, Nov 14, 2008 at 12:37 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> >>>>> On Fri, Nov 14, 2008 at 10:33 AM, Sujith <m.sujith@gmail.com> wrote:
>> >>>>>> Luis R. Rodriguez wrote:
>> >>>>>>> > From the original log:
>> >>>>>>> > ath5k_pci 0000:01:00.0: registered as ''
>> >>>>>>>
>> >>>>>>> That comes from
>> >>>>>>>
>> >>>>>>> dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
>> >>>>>>>
>> >>>>>>> which means that may be buggy too as wiphy_name() gets
>> >>>>>>>
>> >>>>>>> return wiphy->dev.bus_id;
>> >>>>>>>
>> >>>>>>> and we only have set at that point wiphy->dev.parent IIRC.
>> >>>>>>>
>> >>>>>>
>> >>>>>> Nope, parent is set after that call in SET_IEEE80211_DEV()
>> >>>>>> phy[x] gets assigned in alloc_hw().
>> >>>>>>
>> >>>>>> So why is it NULL even if alloc_hw() succeeds ?
>> >>>>>
>> >>>>> Parent is wiphy->dev.parent not wiphy->dev though
>> >>>
>> >>> Sujith is right, wiphy->dev.bus_id should be set by the time that's printed,
>> >>> since wiphy_new in alloc_hw fills it in.
>> >>
>> >> Ah, I see, yes, oh man. Well this is good :)
>> >
>> > OK then try the patch in the other thread. If that doesn't fix it then
>> > it has to be another issue.
>>
>> Can someone point me to the "other thread"? I don't think I was CCed
>> on it since I came late to the party.
>>
>> Someone also inquired about versions. I'm using stable kernel 2.6.27.6
>> and compat-wireless-2008-11-13 (10-28 was doing the same thing
>> however).
>
> Bounced you a copy. Please give it a try.

I gave it a shot. It worked (with some conditions attached). At first
I built with just the patch, not enabling kernel debugging or
anything, and I got a different NPE in some sysfs registration code
deeper down in what I think was still the register_hw method. I can't
fully remember, because it wasn't an easily savable stack trace and I
immediately decided I would just rebuild everything with debug
symbols.

Of course, now that I have a kernel with full debug symbols, I can't
get it to oops. Figures. I'll keep you updated if I see this error
again.

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15  2:05                               ` Dan McGee
@ 2008-11-15  2:29                                 ` Bob Copeland
  2008-11-15  2:57                                   ` Dan McGee
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-15  2:29 UTC (permalink / raw)
  To: Dan McGee
  Cc: Luis R. Rodriguez, Luis R. Rodriguez, Sujith,
	linux-wireless@vger.kernel.org, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 08:05:09PM -0600, Dan McGee wrote:
> On Fri, Nov 14, 2008 at 6:36 PM, Luis R. Rodriguez
> Of course, now that I have a kernel with full debug symbols, I can't
> get it to oops. Figures. I'll keep you updated if I see this error
> again.

Grasping at straws, is there any chance your kernel didn't match up 
with the compiled modules from compat-wireless?  For example, you 
reconfigured the kernel without rebuilding it, then compat-wireless
picked up your .config?  I suppose that could lead to structures being 
defined with wrong offsets... 

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15  2:29                                 ` Bob Copeland
@ 2008-11-15  2:57                                   ` Dan McGee
  2008-11-15  6:30                                     ` Dan McGee
       [not found]                                     ` <449c10960811142229v77ea85f4nf898d447c7e63422@mail.gmail.com>
  0 siblings, 2 replies; 39+ messages in thread
From: Dan McGee @ 2008-11-15  2:57 UTC (permalink / raw)
  To: Bob Copeland
  Cc: Luis R. Rodriguez, Luis R. Rodriguez, Sujith,
	linux-wireless@vger.kernel.org, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 8:29 PM, Bob Copeland <me@bobcopeland.com> wrote:
> On Fri, Nov 14, 2008 at 08:05:09PM -0600, Dan McGee wrote:
>> On Fri, Nov 14, 2008 at 6:36 PM, Luis R. Rodriguez
>> Of course, now that I have a kernel with full debug symbols, I can't
>> get it to oops. Figures. I'll keep you updated if I see this error
>> again.
>
> Grasping at straws, is there any chance your kernel didn't match up
> with the compiled modules from compat-wireless?  For example, you
> reconfigured the kernel without rebuilding it, then compat-wireless
> picked up your .config?  I suppose that could lead to structures being
> defined with wrong offsets...

I doubt it; I'm compiling the whole enchilada in one go so it should
definitely match up. I just reproduced it with a brand new build after
going back to a no-symbols kernel. Here is a transcribed OOPS dump
since this one leaves the machine in a state where it doesn't even
finish booting up. I can *only* get this if I compile without
debugging symbols, and looking at the call trace, you can see the
apic_timer_interrupt call which leads me to believe it is some sort of
timing/race issue that a debug kernel doesn't run into.

-Dan

BUG: unable to handle kernel NULL pointer dereference at 00000082
IP: [<7818ca71>] sysfs_find_dirent+0x9/0x23
Oops: 0000 [#1] PREEMPT
Modules linked in: ath5k(+) mac80211

Pid: 818 comm: modprobe Not tainted (2.6.27.6eee #1)
EIP: 0060:[<7818ca71>] EFLAGS: 00010206 CPU: 0
EIP is at sysfs_find_dirent+0x9/0x23
EAX: 00000001 EBX: 00000072 ECX: 00000001 EDX: b730b4f0
ESI: b730b4f0 EDI: fffffff4 EBP: b7311490 ESP: b73ffd34
 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Stack: <<<i'm too lazy>>>>
Call Trace:
 [<7818cb39>]: __sysfs_add_one+0x10/0x69
 [<7818cba2>]: sysfs_add_one+0x9/0x2f
 [<7818d0bc>]: create_dir+0x3c/0x62
 sysfs_create_dir+0x29/0x3b
 kobject_get
 kobject_add_internal
 kobject_add
 device_add+0x75/0x456
 apic_timer_interrupt+0x28/0x30
 strlcpy+0x11/0x3e
 register_netdevice+0x1f1/0x31a
 ieee80211_register_hw+0x1eb/0x2d0 [mac80211]
 ath5k_pci_probe+0xc27/0x1150 [ath5k]
 find_inode
 pci_device_probe
 driver_probe_device
 .......
 init_ath5k_pci [ath5k]
 .......
 ==============
Code: dd 85 c0 53 89 c3 74 16 83 38 00 75 0f ba 82 00 00 00 b8 f7 75
3c 78 e8 70 da f8 ff ff 03 89 d8 5b c3 56 89 d6 53 8b 58 18 eb 11 <8b>
43 10 89 f2 e8 34 cc 04 00 85 c0 74 07 8b 5b 0c 85 db 75 eb

$ AFLAGS=--32 scripts/decodecode < /tmp/oops.txt
Code: dd 85 c0 53 89 c3 74 16 83 38 00 75 0f ba 82 00 00 00 b8 f7 75
3c 78 e8 70 da f8 ff ff 03 89 d8 5b c3 56 89 d6 53 8b 58 18 eb 11 <8b>
43 10 89 f2 e8 34 cc 04 00 85 c0 74 07 8b 5b 0c 85 db 75 eb

/tmp/tmp.xJNdgiQwSL.o:     file format elf32-i386

Disassembly of section .text:

00000000 <.text>:
   0:	dd 85 c0 53 89 c3    	fldl   -0x3c76ac40(%ebp)
   6:	74 16                	je     0x1e
   8:	83 38 00             	cmpl   $0x0,(%eax)
   b:	75 0f                	jne    0x1c
   d:	ba 82 00 00 00       	mov    $0x82,%edx
  12:	b8 f7 75 3c 78       	mov    $0x783c75f7,%eax
  17:	e8 70 da f8 ff       	call   0xfff8da8c
  1c:	ff 03                	incl   (%ebx)
  1e:	89 d8                	mov    %ebx,%eax
  20:	5b                   	pop    %ebx
  21:	c3                   	ret
  22:	56                   	push   %esi
  23:	89 d6                	mov    %edx,%esi
  25:	53                   	push   %ebx
  26:	8b 58 18             	mov    0x18(%eax),%ebx
  29:	eb 11                	jmp    0x3c

/tmp/tmp.xJNdgiQwSL.o:     file format elf32-i386

Disassembly of section .text:

00000000 <.text>:
   0:	8b 43 10             	mov    0x10(%ebx),%eax
   3:	89 f2                	mov    %esi,%edx
   5:	e8 34 cc 04 00       	call   0x4cc3e
   a:	85 c0                	test   %eax,%eax
   c:	74 07                	je     0x15
   e:	8b 5b 0c             	mov    0xc(%ebx),%ebx
  11:	85 db                	test   %ebx,%ebx
  13:	75 eb                	jne    0x0


And the objdump output from that function, but my novice eyes can't
pick out where the above code corresponds to:
0000027d <__sysfs_add_one>:
 27d:   56                      push   %esi
 27e:   89 c6                   mov    %eax,%esi
 280:   53                      push   %ebx
 281:   89 d3                   mov    %edx,%ebx
 283:   8b 00                   mov    (%eax),%eax
 285:   8b 52 10                mov    0x10(%edx),%edx
 288:   e8 fc ff ff ff          call   289 <__sysfs_add_one+0xc>
 28d:   ba ef ff ff ff          mov    $0xffffffef,%edx
 292:   85 c0                   test   %eax,%eax
 294:   75 4b                   jne    2e1 <__sysfs_add_one+0x64>
 296:   8b 06                   mov    (%esi),%eax
 298:   e8 fe fe ff ff          call   19b <sysfs_get>
 29d:   80 7b 1c 01             cmpb   $0x1,0x1c(%ebx)
 2a1:   89 43 08                mov    %eax,0x8(%ebx)
 2a4:   75 0a                   jne    2b0 <__sysfs_add_one+0x33>
 2a6:   8b 46 04                mov    0x4(%esi),%eax
 2a9:   85 c0                   test   %eax,%eax
 2ab:   74 03                   je     2b0 <__sysfs_add_one+0x33>
 2ad:   ff 40 28                incl   0x28(%eax)
 2b0:   ff 46 0c                incl   0xc(%esi)
 2b3:   83 7b 0c 00             cmpl   $0x0,0xc(%ebx)
 2b7:   8b 43 08                mov    0x8(%ebx),%eax
 2ba:   74 04                   je     2c0 <__sysfs_add_one+0x43>
 2bc:   0f 0b                   ud2a
 2be:   eb fe                   jmp    2be <__sysfs_add_one+0x41>
 2c0:   8d 48 18                lea    0x18(%eax),%ecx
 2c3:   8b 50 18                mov    0x18(%eax),%edx
 2c6:   eb 0e                   jmp    2d6 <__sysfs_add_one+0x59>
 2c8:   8b 43 20                mov    0x20(%ebx),%eax
 2cb:   3b 42 20                cmp    0x20(%edx),%eax
 2ce:   72 0a                   jb     2da <__sysfs_add_one+0x5d>
 2d0:   8d 4a 0c                lea    0xc(%edx),%ecx
 2d3:   8b 52 0c                mov    0xc(%edx),%edx
 2d6:   85 d2                   test   %edx,%edx
 2d8:   75 ee                   jne    2c8 <__sysfs_add_one+0x4b>
 2da:   89 53 0c                mov    %edx,0xc(%ebx)
 2dd:   31 d2                   xor    %edx,%edx
 2df:   89 19                   mov    %ebx,(%ecx)
 2e1:   5b                      pop    %ebx
 2e2:   89 d0                   mov    %edx,%eax
 2e4:   5e                      pop    %esi
 2e5:   c3                      ret

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15  2:57                                   ` Dan McGee
@ 2008-11-15  6:30                                     ` Dan McGee
       [not found]                                     ` <449c10960811142229v77ea85f4nf898d447c7e63422@mail.gmail.com>
  1 sibling, 0 replies; 39+ messages in thread
From: Dan McGee @ 2008-11-15  6:30 UTC (permalink / raw)
  Cc: Luis R. Rodriguez, Luis R. Rodriguez, Sujith,
	linux-wireless@vger.kernel.org, Michael Buesch, Johannes Berg

On Fri, Nov 14, 2008 at 8:57 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>
> BUG: unable to handle kernel NULL pointer dereference at 00000082
> IP: [<7818ca71>] sysfs_find_dirent+0x9/0x23
> Oops: 0000 [#1] PREEMPT
> Modules linked in: ath5k(+) mac80211
>
> Pid: 818 comm: modprobe Not tainted (2.6.27.6eee #1)
> EIP: 0060:[<7818ca71>] EFLAGS: 00010206 CPU: 0
> EIP is at sysfs_find_dirent+0x9/0x23
> EAX: 00000001 EBX: 00000072 ECX: 00000001 EDX: b730b4f0
> ESI: b730b4f0 EDI: fffffff4 EBP: b7311490 ESP: b73ffd34
>  DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
> Stack: <<<i'm too lazy>>>>
> Call Trace:
>  [<7818cb39>]: __sysfs_add_one+0x10/0x69
>  [<7818cba2>]: sysfs_add_one+0x9/0x2f
>  [<7818d0bc>]: create_dir+0x3c/0x62
>  sysfs_create_dir+0x29/0x3b
>  kobject_get
>  kobject_add_internal
>  kobject_add
>  device_add+0x75/0x456
>  apic_timer_interrupt+0x28/0x30
>  strlcpy+0x11/0x3e
>  register_netdevice+0x1f1/0x31a
>  ieee80211_register_hw+0x1eb/0x2d0 [mac80211]
>  ath5k_pci_probe+0xc27/0x1150 [ath5k]
>  find_inode
>  pci_device_probe
>  driver_probe_device
>  .......
>  init_ath5k_pci [ath5k]
>  .......
>  ==============
> Code: dd 85 c0 53 89 c3 74 16 83 38 00 75 0f ba 82 00 00 00 b8 f7 75
> 3c 78 e8 70 da f8 ff ff 03 89 d8 5b c3 56 89 d6 53 8b 58 18 eb 11 <8b>
> 43 10 89 f2 e8 34 cc 04 00 85 c0 74 07 8b 5b 0c 85 db 75 eb
>
> $ AFLAGS=--32 scripts/decodecode < /tmp/oops.txt
> Code: dd 85 c0 53 89 c3 74 16 83 38 00 75 0f ba 82 00 00 00 b8 f7 75
> 3c 78 e8 70 da f8 ff ff 03 89 d8 5b c3 56 89 d6 53 8b 58 18 eb 11 <8b>
> 43 10 89 f2 e8 34 cc 04 00 85 c0 74 07 8b 5b 0c 85 db 75 eb
>
> /tmp/tmp.xJNdgiQwSL.o:     file format elf32-i386
>
> Disassembly of section .text:
>
> 00000000 <.text>:
>   0:   dd 85 c0 53 89 c3       fldl   -0x3c76ac40(%ebp)
>   6:   74 16                   je     0x1e
>   8:   83 38 00                cmpl   $0x0,(%eax)
>   b:   75 0f                   jne    0x1c
>   d:   ba 82 00 00 00          mov    $0x82,%edx
>  12:   b8 f7 75 3c 78          mov    $0x783c75f7,%eax
>  17:   e8 70 da f8 ff          call   0xfff8da8c
>  1c:   ff 03                   incl   (%ebx)
>  1e:   89 d8                   mov    %ebx,%eax
>  20:   5b                      pop    %ebx
>  21:   c3                      ret
>  22:   56                      push   %esi
>  23:   89 d6                   mov    %edx,%esi
>  25:   53                      push   %ebx
>  26:   8b 58 18                mov    0x18(%eax),%ebx
>  29:   eb 11                   jmp    0x3c
>
> /tmp/tmp.xJNdgiQwSL.o:     file format elf32-i386
>
> Disassembly of section .text:
>
> 00000000 <.text>:
>   0:   8b 43 10                mov    0x10(%ebx),%eax
>   3:   89 f2                   mov    %esi,%edx
>   5:   e8 34 cc 04 00          call   0x4cc3e
>   a:   85 c0                   test   %eax,%eax
>   c:   74 07                   je     0x15
>   e:   8b 5b 0c                mov    0xc(%ebx),%ebx
>  11:   85 db                   test   %ebx,%ebx
>  13:   75 eb                   jne    0x0

And I had the code completely wrong, oops. Looks like we are bailing
on the strcmp call in this function or something along those lines? I
wish I could be a bigger help with debugging this stuff.

-Dan

000001bc <sysfs_find_dirent>:
 *      RETURNS:
 *      Pointer to sysfs_dirent if found, NULL if not.
 */
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
                                      const unsigned char *name)
{
 1bc:   56                      push   %esi
 1bd:   89 d6                   mov    %edx,%esi
 1bf:   53                      push   %ebx
       struct sysfs_dirent *sd;

       for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
 1c0:   8b 58 18                mov    0x18(%eax),%ebx
 1c3:   eb 11                   jmp    1d6 <sysfs_find_dirent+0x1a>
               if (!strcmp(sd->s_name, name))
 1c5:   8b 43 10                mov    0x10(%ebx),%eax
 1c8:   89 f2                   mov    %esi,%edx
 1ca:   e8 fc ff ff ff          call   1cb <sysfs_find_dirent+0xf>
 1cf:   85 c0                   test   %eax,%eax
 1d1:   74 07                   je     1da <sysfs_find_dirent+0x1e>
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
                                      const unsigned char *name)
{
       struct sysfs_dirent *sd;

       for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
 1d3:   8b 5b 0c                mov    0xc(%ebx),%ebx
 1d6:   85 db                   test   %ebx,%ebx
 1d8:   75 eb                   jne    1c5 <sysfs_find_dirent+0x9>
               if (!strcmp(sd->s_name, name))
                       return sd;
       return NULL;
}
 1da:   89 d8                   mov    %ebx,%eax
 1dc:   5b                      pop    %ebx
 1dd:   5e                      pop    %esi
 1de:   c3                      ret

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
       [not found]                                     ` <449c10960811142229v77ea85f4nf898d447c7e63422@mail.gmail.com>
@ 2008-11-15 18:19                                       ` Bob Copeland
  2008-11-16  0:12                                         ` Dan McGee
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-15 18:19 UTC (permalink / raw)
  To: Dan McGee; +Cc: mcgrof, m.sujith, linux-wireless, mb, johannes

On Sat, Nov 15, 2008 at 12:29:34AM -0600, Dan McGee wrote:
> On Fri, Nov 14, 2008 at 8:57 PM, Dan McGee <dpmcgee@gmail.com> wrote:
> >
> > BUG: unable to handle kernel NULL pointer dereference at 00000082
> > IP: [<7818ca71>] sysfs_find_dirent+0x9/0x23
> > Oops: 0000 [#1] PREEMPT
> > Modules linked in: ath5k(+) mac80211

So, just to recap, this is with Luis' patch; now you get a null pointer
dereference in sysfs instead of in ieee80211_register_hw?  It does look 
like we're deep in register_netdevice now.  If you revert his patch, you
can still get the error in register_hw every time?

> > Pid: 818 comm: modprobe Not tainted (2.6.27.6eee #1)
> > EIP: 0060:[<7818ca71>] EFLAGS: 00010206 CPU: 0
> > EIP is at sysfs_find_dirent+0x9/0x23
> > EAX: 00000001 EBX: 00000072 ECX: 00000001 EDX: b730b4f0
> > ESI: b730b4f0 EDI: fffffff4 EBP: b7311490 ESP: b73ffd34

EBX is 00000072, definitely not a pointer.

> And I had the code completely wrong, oops. Looks like we are bailing
> on the strcmp call in this function or something along those lines? I
> wish I could be a bigger help with debugging this stuff.

Yep, or at least in the setup code for that.  Don't worry, you're being
a big help; I think we just don't have a good enough theory yet to
propose decent debugging patches.

> struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
>                                        const unsigned char *name)
> {
>  1bc:   56                      push   %esi
>  1bd:   89 d6                   mov    %edx,%esi
>  1bf:   53                      push   %ebx
>         struct sysfs_dirent *sd;
> 
>         for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
>  1c0:   8b 58 18                mov    0x18(%eax),%ebx
>  1c3:   eb 11                   jmp    1d6 <sysfs_find_dirent+0x1a>
>                 if (!strcmp(sd->s_name, name))
>  1c5:   8b 43 10                mov    0x10(%ebx),%eax

EBX appears to be sd (it's initialized at line 1c0 to parent_sd + 0x18,
which is &parent_sd->s_dir.children, then it jumps to the loop test).
Thus EAX must be sd->s_sibling, which we hope to use for strcmp.

So, while traversing the sibling pointers, one of them happens to be
00000072 (instead of what should probably have been NULL).  0x72 is not
a poison value I'm aware of.  At this point, things have gone south, but
the real problem happened earlier.

Can you post your .config?

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-15 18:19                                       ` Bob Copeland
@ 2008-11-16  0:12                                         ` Dan McGee
  0 siblings, 0 replies; 39+ messages in thread
From: Dan McGee @ 2008-11-16  0:12 UTC (permalink / raw)
  To: Bob Copeland; +Cc: mcgrof, m.sujith, linux-wireless, mb, johannes

On Sat, Nov 15, 2008 at 12:19 PM, Bob Copeland <me@bobcopeland.com> wrote:
> On Sat, Nov 15, 2008 at 12:29:34AM -0600, Dan McGee wrote:
>> On Fri, Nov 14, 2008 at 8:57 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>> >
>> > BUG: unable to handle kernel NULL pointer dereference at 00000082
>> > IP: [<7818ca71>] sysfs_find_dirent+0x9/0x23
>> > Oops: 0000 [#1] PREEMPT
>> > Modules linked in: ath5k(+) mac80211
>
> So, just to recap, this is with Luis' patch; now you get a null pointer
> dereference in sysfs instead of in ieee80211_register_hw?  It does look
> like we're deep in register_netdevice now.  If you revert his patch, you
> can still get the error in register_hw every time?

Yeah, this is with Luis' patch. Without that patch it always bugs out
at the earlier step in register_hw(). And like I said, I can't
reproduce this one with debug symbols built into the kernel
unfortunately.

>> > Pid: 818 comm: modprobe Not tainted (2.6.27.6eee #1)
>> > EIP: 0060:[<7818ca71>] EFLAGS: 00010206 CPU: 0
>> > EIP is at sysfs_find_dirent+0x9/0x23
>> > EAX: 00000001 EBX: 00000072 ECX: 00000001 EDX: b730b4f0
>> > ESI: b730b4f0 EDI: fffffff4 EBP: b7311490 ESP: b73ffd34
>
> EBX is 00000072, definitely not a pointer.
>
>> And I had the code completely wrong, oops. Looks like we are bailing
>> on the strcmp call in this function or something along those lines? I
>> wish I could be a bigger help with debugging this stuff.
>
> Yep, or at least in the setup code for that.  Don't worry, you're being
> a big help; I think we just don't have a good enough theory yet to
> propose decent debugging patches.
>
>> struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
>>                                        const unsigned char *name)
>> {
>>  1bc:   56                      push   %esi
>>  1bd:   89 d6                   mov    %edx,%esi
>>  1bf:   53                      push   %ebx
>>         struct sysfs_dirent *sd;
>>
>>         for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
>>  1c0:   8b 58 18                mov    0x18(%eax),%ebx
>>  1c3:   eb 11                   jmp    1d6 <sysfs_find_dirent+0x1a>
>>                 if (!strcmp(sd->s_name, name))
>>  1c5:   8b 43 10                mov    0x10(%ebx),%eax
>
> EBX appears to be sd (it's initialized at line 1c0 to parent_sd + 0x18,
> which is &parent_sd->s_dir.children, then it jumps to the loop test).
> Thus EAX must be sd->s_sibling, which we hope to use for strcmp.
>
> So, while traversing the sibling pointers, one of them happens to be
> 00000072 (instead of what should probably have been NULL).  0x72 is not
> a poison value I'm aware of.  At this point, things have gone south, but
> the real problem happened earlier.

Yeah, I figured it was something earlier that didn't quite work out,
but I really had no idea where to start poking.

> Can you post your .config?

Sure- here it is: http://www.toofishes.net/uploads/kernelconfig

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-14 17:02   ` Bob Copeland
  2008-11-14 17:37     ` Luis R. Rodriguez
@ 2008-11-16  2:11     ` Dan McGee
  2008-11-16  2:38       ` Dan McGee
  1 sibling, 1 reply; 39+ messages in thread
From: Dan McGee @ 2008-11-16  2:11 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Luis R. Rodriguez, linux-wireless, Michael Buesch

On Fri, Nov 14, 2008 at 11:02 AM, Bob Copeland <me@bobcopeland.com> wrote:
> On Fri, Nov 14, 2008 at 1:17 AM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> If our offsets are the same then its probably on line 791:
> [...]
>> 790             name = wiphy_dev(local->hw.wiphy)->driver->name;
>> 791             local->hw.workqueue = create_freezeable_workqueue(name);
>
> I agree, having looked at the objdump output.  Hmm, maybe ->driver pointer
> is bad even though I can't see that happening.  Dan, can you try adding a
> printk before line 790 to see if any of the pointers are null?

So I went back and added a few things to the original unpatched code
to see what was NULL pointering, just to be sure we were thinking
right. Here is the relevant code:
        printk(KERN_DEBUG "wiphy_dev() : %p\n", wiphy_dev(local->hw.wiphy));
        printk(KERN_DEBUG "driver      : %p\n",
wiphy_dev(local->hw.wiphy)->driver);
        printk(KERN_DEBUG "driver->name: %p\n",
wiphy_dev(local->hw.wiphy)->driver->name);
        name = wiphy_dev(local->hw.wiphy)->driver->name;
        local->hw.workqueue = create_freezeable_workqueue(name);

And the dmesg output:
ath5k_pci xxx: registered as ''
wiphy_dev() : b730b408
driver       : 00000001
BUG: unalbe to handle kernel NULL pointer dereference at 00000001

So we bugged out on trying to print driver->name, which is the same
problem we would have hit in the 'name =' line.

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  2:11     ` Dan McGee
@ 2008-11-16  2:38       ` Dan McGee
  2008-11-16  5:48         ` Luis R. Rodriguez
  2008-11-16  5:55         ` Bob Copeland
  0 siblings, 2 replies; 39+ messages in thread
From: Dan McGee @ 2008-11-16  2:38 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Luis R. Rodriguez, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 8:11 PM, Dan McGee <dpmcgee@gmail.com> wrote:
> On Fri, Nov 14, 2008 at 11:02 AM, Bob Copeland <me@bobcopeland.com> wrote:
>> On Fri, Nov 14, 2008 at 1:17 AM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>>> If our offsets are the same then its probably on line 791:
>> [...]
>>> 790             name = wiphy_dev(local->hw.wiphy)->driver->name;
>>> 791             local->hw.workqueue = create_freezeable_workqueue(name);
>>
>> I agree, having looked at the objdump output.  Hmm, maybe ->driver pointer
>> is bad even though I can't see that happening.  Dan, can you try adding a
>> printk before line 790 to see if any of the pointers are null?
>
> So I went back and added a few things to the original unpatched code
> to see what was NULL pointering, just to be sure we were thinking
> right. Here is the relevant code:
>        printk(KERN_DEBUG "wiphy_dev() : %p\n", wiphy_dev(local->hw.wiphy));
>        printk(KERN_DEBUG "driver      : %p\n",
> wiphy_dev(local->hw.wiphy)->driver);
>        printk(KERN_DEBUG "driver->name: %p\n",
> wiphy_dev(local->hw.wiphy)->driver->name);
>        name = wiphy_dev(local->hw.wiphy)->driver->name;
>        local->hw.workqueue = create_freezeable_workqueue(name);
>
> And the dmesg output:
> ath5k_pci xxx: registered as ''
> wiphy_dev() : b730b408
> driver       : 00000001
> BUG: unalbe to handle kernel NULL pointer dereference at 00000001
>
> So we bugged out on trying to print driver->name, which is the same
> problem we would have hit in the 'name =' line.

I should clarify here- the real bug was when trying to access
'->driver', as we got the 00000001 poison pointer returned (this is a
poison value, right?).

The above sequence of events was what took place when trying to load
the module on startup. To see if other things had an effect, I
disabled module autoloading during the boot sequence and got slightly
different results but it looks to be the same type of problem:

registered as ''
wiphy_dev: b730d740
driver: 7fffffff
driver->name: ffffffff
BUG: unable to handle kernel paging request at ffffffff

One more note- booting with the 2.6.27.6 shipped wireless modules
(mac80211 and ath5k) has always been working fine. It is only when I
try to run compat-wireless on top of this kernel that we are seeing
issues. Theoretically that means this should be bisectable if we
really can't figure it out, but I'm not sure how practical that is.

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  2:38       ` Dan McGee
@ 2008-11-16  5:48         ` Luis R. Rodriguez
  2008-11-16  5:53           ` Bob Copeland
  2008-11-16  5:55         ` Bob Copeland
  1 sibling, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-16  5:48 UTC (permalink / raw)
  To: Dan McGee; +Cc: Bob Copeland, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 6:38 PM, Dan McGee <dpmcgee@gmail.com> wrote:
> On Sat, Nov 15, 2008 at 8:11 PM, Dan McGee <dpmcgee@gmail.com> wrote:
>> On Fri, Nov 14, 2008 at 11:02 AM, Bob Copeland <me@bobcopeland.com> wrote:
>>> On Fri, Nov 14, 2008 at 1:17 AM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>>>> If our offsets are the same then its probably on line 791:
>>> [...]
>>>> 790             name = wiphy_dev(local->hw.wiphy)->driver->name;
>>>> 791             local->hw.workqueue = create_freezeable_workqueue(name);
>>>
>>> I agree, having looked at the objdump output.  Hmm, maybe ->driver pointer
>>> is bad even though I can't see that happening.  Dan, can you try adding a
>>> printk before line 790 to see if any of the pointers are null?
>>
>> So I went back and added a few things to the original unpatched code
>> to see what was NULL pointering, just to be sure we were thinking
>> right. Here is the relevant code:
>>        printk(KERN_DEBUG "wiphy_dev() : %p\n", wiphy_dev(local->hw.wiphy));
>>        printk(KERN_DEBUG "driver      : %p\n",
>> wiphy_dev(local->hw.wiphy)->driver);
>>        printk(KERN_DEBUG "driver->name: %p\n",
>> wiphy_dev(local->hw.wiphy)->driver->name);
>>        name = wiphy_dev(local->hw.wiphy)->driver->name;
>>        local->hw.workqueue = create_freezeable_workqueue(name);
>>
>> And the dmesg output:
>> ath5k_pci xxx: registered as ''
>> wiphy_dev() : b730b408
>> driver       : 00000001
>> BUG: unalbe to handle kernel NULL pointer dereference at 00000001
>>
>> So we bugged out on trying to print driver->name, which is the same
>> problem we would have hit in the 'name =' line.
>
> I should clarify here- the real bug was when trying to access
> '->driver', as we got the 00000001 poison pointer returned (this is a
> poison value, right?).

Not sure why its 00000001, nor do I know if its poison. One thing I am
fairly positive about is that the reason why this was wrong all along
was because we were trying to get the device's ->driver structure to
get driver->name but the device won't get its ->driver pointer
assigned until *after* a successful probe. Lets review the PCI probe:

/**
 * __pci_device_probe()
 * @drv: driver to call to check if it wants the PCI device
 * @pci_dev: PCI device being probed
 *
 * returns 0 on success, else error.
 * side-effect: pci_dev->driver is set to drv when drv claims pci_dev.
 */
static int
__pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
{
        const struct pci_device_id *id;
        int error = 0;

        if (!pci_dev->driver && drv->probe) {
                error = -ENODEV;

                id = pci_match_device(drv, pci_dev);
                if (id)
                        error = pci_call_probe(drv, pci_dev, id);
                if (error >= 0) {
                        pci_dev->driver = drv;
                        error = 0;
                }
        }
        return error;
}

So unless probe was successful (pci_call_probe which calls
drv->probe()) we don't update pci_dev->driver pointer.

> The above sequence of events was what took place when trying to load
> the module on startup. To see if other things had an effect, I
> disabled module autoloading during the boot sequence and got slightly
> different results but it looks to be the same type of problem:
>
> registered as ''
> wiphy_dev: b730d740
> driver: 7fffffff
> driver->name: ffffffff
> BUG: unable to handle kernel paging request at ffffffff
>
> One more note- booting with the 2.6.27.6 shipped wireless modules
> (mac80211 and ath5k) has always been working fine. It is only when I
> try to run compat-wireless on top of this kernel that we are seeing
> issues.

This is interesting, but then again the fact that it was working *all
along* for other devices is interesting too as it shouldn't have.

> Theoretically that means this should be bisectable if we
> really can't figure it out, but I'm not sure how practical that is.

Yeah don't bother, the issue on this e-mail was fixed, another issue
has come up though so that is separate.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  5:48         ` Luis R. Rodriguez
@ 2008-11-16  5:53           ` Bob Copeland
  2008-11-16  6:05             ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-16  5:53 UTC (permalink / raw)
  To: Luis R. Rodriguez; +Cc: Dan McGee, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 09:48:13PM -0800, Luis R. Rodriguez wrote:
> Not sure why its 00000001, nor do I know if its poison. One thing I am
> fairly positive about is that the reason why this was wrong all along
> was because we were trying to get the device's ->driver structure to
> get driver->name but the device won't get its ->driver pointer
> assigned until *after* a successful probe. Lets review the PCI probe:

No, as I later corrected myself, that is pci_dev->driver, but what
mac80211 is using is pci_dev->dev.driver.  Which actually happens to be
set up fine.  (I verified this by inserting a printk into the ath5k
driver and it all works fine.)

> Yeah don't bother, the issue on this e-mail was fixed, another issue
> has come up though so that is separate.

I think the second issue is just the first issue, but the fix papered
over the first bug.

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  2:38       ` Dan McGee
  2008-11-16  5:48         ` Luis R. Rodriguez
@ 2008-11-16  5:55         ` Bob Copeland
  1 sibling, 0 replies; 39+ messages in thread
From: Bob Copeland @ 2008-11-16  5:55 UTC (permalink / raw)
  To: Dan McGee; +Cc: Luis R. Rodriguez, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 08:38:32PM -0600, Dan McGee wrote:
> I should clarify here- the real bug was when trying to access
> '->driver', as we got the 00000001 poison pointer returned (this is a
> poison value, right?).

Not sure it's a poison, but it's definitely a bad value.  The good news 
is that with your config and compat-wireless I can easily reproduce this 
now.  So hopefully I'll be able to pin something down soon.

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  5:53           ` Bob Copeland
@ 2008-11-16  6:05             ` Luis R. Rodriguez
  2008-11-16  6:06               ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-16  6:05 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Dan McGee, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 9:53 PM, Bob Copeland <me@bobcopeland.com> wrote:
> On Sat, Nov 15, 2008 at 09:48:13PM -0800, Luis R. Rodriguez wrote:
>> Not sure why its 00000001, nor do I know if its poison. One thing I am
>> fairly positive about is that the reason why this was wrong all along
>> was because we were trying to get the device's ->driver structure to
>> get driver->name but the device won't get its ->driver pointer
>> assigned until *after* a successful probe. Lets review the PCI probe:
>
> No, as I later corrected myself, that is pci_dev->driver, but what
> mac80211 is using is pci_dev->dev.driver.

Right, the dev for the pci_dev which hasn't yet had a driver assigned
yet, no? Who assigns the pci_dev's dev's driver pointer and when? Or
am I still missing something?

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  6:05             ` Luis R. Rodriguez
@ 2008-11-16  6:06               ` Luis R. Rodriguez
  2008-11-16  6:15                 ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-16  6:06 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Dan McGee, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 10:05 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> On Sat, Nov 15, 2008 at 9:53 PM, Bob Copeland <me@bobcopeland.com> wrote:
>> On Sat, Nov 15, 2008 at 09:48:13PM -0800, Luis R. Rodriguez wrote:
>>> Not sure why its 00000001, nor do I know if its poison. One thing I am
>>> fairly positive about is that the reason why this was wrong all along
>>> was because we were trying to get the device's ->driver structure to
>>> get driver->name but the device won't get its ->driver pointer
>>> assigned until *after* a successful probe. Lets review the PCI probe:
>>
>> No, as I later corrected myself, that is pci_dev->driver, but what
>> mac80211 is using is pci_dev->dev.driver.
>
> Right, the dev for the pci_dev which hasn't yet had a driver assigned
> yet, no? Who assigns the pci_dev's dev's driver pointer and when? Or
> am I still missing something?

Oh nevermind yeah pci_dev->driver = drv; but still who then assigns
the dev's driver?

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  6:06               ` Luis R. Rodriguez
@ 2008-11-16  6:15                 ` Luis R. Rodriguez
  2008-11-16 16:20                   ` Bob Copeland
  0 siblings, 1 reply; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-16  6:15 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Dan McGee, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 10:06 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
> On Sat, Nov 15, 2008 at 10:05 PM, Luis R. Rodriguez <mcgrof@gmail.com> wrote:
>> On Sat, Nov 15, 2008 at 9:53 PM, Bob Copeland <me@bobcopeland.com> wrote:
>>> On Sat, Nov 15, 2008 at 09:48:13PM -0800, Luis R. Rodriguez wrote:
>>>> Not sure why its 00000001, nor do I know if its poison. One thing I am
>>>> fairly positive about is that the reason why this was wrong all along
>>>> was because we were trying to get the device's ->driver structure to
>>>> get driver->name but the device won't get its ->driver pointer
>>>> assigned until *after* a successful probe. Lets review the PCI probe:
>>>
>>> No, as I later corrected myself, that is pci_dev->driver, but what
>>> mac80211 is using is pci_dev->dev.driver.
>>
>> Right, the dev for the pci_dev which hasn't yet had a driver assigned
>> yet, no? Who assigns the pci_dev's dev's driver pointer and when? Or
>> am I still missing something?
>
> Oh nevermind yeah pci_dev->driver = drv; but still who then assigns
> the dev's driver?

The answer is drivers/base/dd.c really_probe(). Curious enough guess
what, dev->driver = drv; is assigned *before* the device driver probe
:) (bus probe in this case first, so dev->bus->probe) contrary to the
other way around. So you're absolutely right the patch is not
necessary then. Good catch.

  Luis

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16  6:15                 ` Luis R. Rodriguez
@ 2008-11-16 16:20                   ` Bob Copeland
  2008-11-16 16:38                     ` Dan McGee
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-16 16:20 UTC (permalink / raw)
  To: Luis R. Rodriguez; +Cc: Dan McGee, linux-wireless, Michael Buesch

On Sat, Nov 15, 2008 at 10:15:31PM -0800, Luis R. Rodriguez wrote:
> The answer is drivers/base/dd.c really_probe(). Curious enough guess
> what, dev->driver = drv; is assigned *before* the device driver probe
> :) (bus probe in this case first, so dev->bus->probe) contrary to the
> other way around. So you're absolutely right the patch is not
> necessary then. Good catch.

Cool, so that's where that is.  

I got some more info, this looks like a miscompilation of some kind,
maybe a mismatch between kernel config and compat-wireless, but I didn't
see anything obvious.

Here's what I did:

--- drivers/net/wireless/ath5k/base.c	2008-11-14 00:11:09.000000000 -0500
+++ drivers/net/wireless/ath5k/my-base.c	2008-11-16 11:01:42.000000000 -0500
@@ -490,6 +490,9 @@
 		goto err_map;
 	}
 
+	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 
+		hw->wiphy, sizeof(struct wiphy), 1);
+	printk(KERN_INFO "offset: %d\n", offsetof(struct wiphy, dev.bus_id));
 	dev_info(&pdev->dev, "registered as '%s'\n", wiphy_name(hw->wiphy));
 
 	/* Initialize driver private data */

Which produced:

ath5k_pci 0000:02:00.0: PCI INT A -> GSI 17 (level, low) -> IRQ 17
ath5k_pci 0000:02:00.0: setting latency timer to 64
00000000: 00 00 00 00 00 00 00 00 b4 ea 07 b7 00 00 00 00  ................
00000010: 00 00 00 00 54 c0 03 b3 54 c0 03 b3 9a c0 24 78  ....T...T.....$x
00000020: 58 c0 24 78 00 00 00 00 00 00 00 00 00 00 00 00  X.$x............
00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000080: c0 c0 03 b3 c0 c0 03 b3 00 00 00 00 40 b2 c0 b5  ............@...
00000090: 28 cf 42 78 00 00 00 00 01 00 00 00 01 00 00 00  (.Bx............
000000a0: 70 68 79 30 00 00 00 00 00 00 00 00 00 00 00 00  phy0............
000000b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
000000c0: 01 00 00 00 04 c1 03 b3 04 c1 03 b3 00 00 00 00  ................
000000d0: 00 00 00 00 00 00 00 00 00 c0 03 b3 00 00 00 00  ................
000000e0: 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00  ................
000000f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000100: 40 c1 03 b3 40 c1 03 b3 00 00 00 00 00 00 00 00  @...@...........
00000110: 50 c1 03 b3 50 c1 03 b3 58 c1 03 b3 58 c1 03 b3  P...P...X...X...
00000120: 2c 0f 45 78 00 00 00 00 00 00 00 00 00 00 00 00  ,.Ex............
00000130: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
offset: 164
ath5k_pci 0000:02:00.0: registered as ''
BUG: unable to handle kernel NULL pointer dereference at 00000000

As you can see, the modules think "phy0" should be at offset 164, but
it's really at 160.

Also I noticed the original config had CONFIG_DYNAMIC_FTRACE=y, unfortunately
turning that off didn't seem to change anything (hmm, I didn't get a build 
bug, was the breakage fixed in 2.6.27.y?)

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16 16:20                   ` Bob Copeland
@ 2008-11-16 16:38                     ` Dan McGee
  2008-11-16 16:52                       ` Bob Copeland
  0 siblings, 1 reply; 39+ messages in thread
From: Dan McGee @ 2008-11-16 16:38 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Luis R. Rodriguez, linux-wireless, Michael Buesch

On Sun, Nov 16, 2008 at 10:20 AM, Bob Copeland <me@bobcopeland.com> wrote:
> As you can see, the modules think "phy0" should be at offset 164, but
> it's really at 160.

> Also I noticed the original config had CONFIG_DYNAMIC_FTRACE=y, unfortunately
> turning that off didn't seem to change anything (hmm, I didn't get a build
> bug, was the breakage fixed in 2.6.27.y?)

Note that HAVE is set if the architecture supports it; CONFIG_FTRACE
itself was turned off (and CONFIG_DYNAMIC_FTRACE is not even present):
$ grep FTRACE .config
CONFIG_HAVE_FTRACE=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
# CONFIG_FTRACE is not set

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16 16:38                     ` Dan McGee
@ 2008-11-16 16:52                       ` Bob Copeland
  2008-11-16 18:01                         ` Dan McGee
  0 siblings, 1 reply; 39+ messages in thread
From: Bob Copeland @ 2008-11-16 16:52 UTC (permalink / raw)
  To: Dan McGee; +Cc: Luis R. Rodriguez, linux-wireless, Michael Buesch

On Sun, Nov 16, 2008 at 10:38:39AM -0600, Dan McGee wrote:
> On Sun, Nov 16, 2008 at 10:20 AM, Bob Copeland <me@bobcopeland.com> wrote:
> > As you can see, the modules think "phy0" should be at offset 164, but
> > it's really at 160.
> 
> > Also I noticed the original config had CONFIG_DYNAMIC_FTRACE=y, unfortunately
> > turning that off didn't seem to change anything (hmm, I didn't get a build
> > bug, was the breakage fixed in 2.6.27.y?)
> 
> Note that HAVE is set if the architecture supports it; CONFIG_FTRACE
> itself was turned off (and CONFIG_DYNAMIC_FTRACE is not even present):
> $ grep FTRACE .config
> CONFIG_HAVE_FTRACE=y
> CONFIG_HAVE_DYNAMIC_FTRACE=y
> # CONFIG_FTRACE is not set

Oh, you are right.  Thanks.  I probably won't be able to do any more
testing today but I would suggest trying to turn off wireless-related
options in the main kernel config and see if you can get it working 
(start with cfg80211 and its dependencies...)

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16 16:52                       ` Bob Copeland
@ 2008-11-16 18:01                         ` Dan McGee
  2008-11-16 18:05                           ` Johannes Berg
  0 siblings, 1 reply; 39+ messages in thread
From: Dan McGee @ 2008-11-16 18:01 UTC (permalink / raw)
  To: Bob Copeland; +Cc: Luis R. Rodriguez, linux-wireless, Michael Buesch

On Sun, Nov 16, 2008 at 10:52 AM, Bob Copeland <me@bobcopeland.com> wrote:
> On Sun, Nov 16, 2008 at 10:38:39AM -0600, Dan McGee wrote:
>> On Sun, Nov 16, 2008 at 10:20 AM, Bob Copeland <me@bobcopeland.com> wrote:
>> > As you can see, the modules think "phy0" should be at offset 164, but
>> > it's really at 160.
>>
>> > Also I noticed the original config had CONFIG_DYNAMIC_FTRACE=y, unfortunately
>> > turning that off didn't seem to change anything (hmm, I didn't get a build
>> > bug, was the breakage fixed in 2.6.27.y?)
>>
>> Note that HAVE is set if the architecture supports it; CONFIG_FTRACE
>> itself was turned off (and CONFIG_DYNAMIC_FTRACE is not even present):
>> $ grep FTRACE .config
>> CONFIG_HAVE_FTRACE=y
>> CONFIG_HAVE_DYNAMIC_FTRACE=y
>> # CONFIG_FTRACE is not set
>
> Oh, you are right.  Thanks.  I probably won't be able to do any more
> testing today but I would suggest trying to turn off wireless-related
> options in the main kernel config and see if you can get it working
> (start with cfg80211 and its dependencies...)

The fun continues. If I rebuild the kernel with all wireless options
turned off except WIRELESS_EXT, then build compat-wireless, our module
appears to load without issues.

.config:
#
# Wireless
#
# CONFIG_CFG80211 is not set
CONFIG_WIRELESS_EXT=y
CONFIG_WIRELESS_EXT_SYSFS=y
# CONFIG_MAC80211 is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set

dmesg:
atl2: eth0 NIC Link is Up<100 Mbps Full Duplex>
evdev.c(EVIOCGBIT): Suspicious buffer size 511, limiting output to 64
bytes. See http://userweb.kernel.org/~dtor/eviocgbit-bug.html
<<< boot stopped here, then I modprobed ath5k >>>
cfg80211: Using static regulatory domain info
cfg80211: Regulatory domain: US
	(start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)
	(2402000 KHz - 2472000 KHz @ 40000 KHz), (600 mBi, 2700 mBm)
	(5170000 KHz - 5190000 KHz @ 40000 KHz), (600 mBi, 2300 mBm)
	(5190000 KHz - 5210000 KHz @ 40000 KHz), (600 mBi, 2300 mBm)
	(5210000 KHz - 5230000 KHz @ 40000 KHz), (600 mBi, 2300 mBm)
	(5230000 KHz - 5330000 KHz @ 40000 KHz), (600 mBi, 2300 mBm)
	(5735000 KHz - 5835000 KHz @ 40000 KHz), (600 mBi, 3000 mBm)
cfg80211: Calling CRDA for country: US
ath5k_pci 0000:01:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
ath5k_pci 0000:01:00.0: setting latency timer to 64
ath5k_pci 0000:01:00.0: registered as 'phy0'
phy0: Selected rate control algorithm 'pid'
ath5k phy0: Atheros AR2425 chip found (MAC: 0xe2, PHY: 0x70)
wlan0: authenticate with AP 00:1c:10:21:f5:f4
wlan0: authenticated
wlan0: associate with AP 00:1c:10:21:f5:f4
wlan0: RX AssocResp from 00:1c:10:21:f5:f4 (capab=0x411 status=0 aid=3)
wlan0: associated

I had not seen this cfg80211 stuff before when modprobing. I also see
that cfg80211 is now loaded as a module- was the fact that it was
built into the kernel before causing all these problems? That would be
my strong suspicion, in which case this entire thread is really a bum
bug report, except that compat-wireless should enforce that cfg80211
is a module just like it currently enforces that for mac80211, saving
someone else this headache.

-Dan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16 18:01                         ` Dan McGee
@ 2008-11-16 18:05                           ` Johannes Berg
  2008-11-16 18:20                             ` Luis R. Rodriguez
  0 siblings, 1 reply; 39+ messages in thread
From: Johannes Berg @ 2008-11-16 18:05 UTC (permalink / raw)
  To: Dan McGee; +Cc: Bob Copeland, Luis R. Rodriguez, linux-wireless, Michael Buesch

[-- Attachment #1: Type: text/plain, Size: 576 bytes --]

On Sun, 2008-11-16 at 12:01 -0600, Dan McGee wrote:

> I had not seen this cfg80211 stuff before when modprobing. I also see
> that cfg80211 is now loaded as a module- was the fact that it was
> built into the kernel before causing all these problems? That would be
> my strong suspicion, in which case this entire thread is really a bum
> bug report, except that compat-wireless should enforce that cfg80211
> is a module just like it currently enforces that for mac80211, saving
> someone else this headache.

Let me guess, symbol versioning was off?

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Kernel oops when loading ath5k from compat-wireless in 2.6.27
  2008-11-16 18:05                           ` Johannes Berg
@ 2008-11-16 18:20                             ` Luis R. Rodriguez
  0 siblings, 0 replies; 39+ messages in thread
From: Luis R. Rodriguez @ 2008-11-16 18:20 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Dan McGee, Bob Copeland, linux-wireless, Michael Buesch

On Sun, Nov 16, 2008 at 10:05 AM, Johannes Berg
<johannes@sipsolutions.net> wrote:
> On Sun, 2008-11-16 at 12:01 -0600, Dan McGee wrote:
>
>> I had not seen this cfg80211 stuff before when modprobing. I also see
>> that cfg80211 is now loaded as a module- was the fact that it was
>> built into the kernel before causing all these problems? That would be
>> my strong suspicion, in which case this entire thread is really a bum
>> bug report, except that compat-wireless should enforce that cfg80211
>> is a module just like it currently enforces that for mac80211, saving
>> someone else this headache.
>
> Let me guess, symbol versioning was off?

/me adds complaint on compat-wireless if cfg80211 is built in

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2008-11-16 18:20 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-14  5:46 Kernel oops when loading ath5k from compat-wireless in 2.6.27 Dan McGee
2008-11-14  6:17 ` Luis R. Rodriguez
2008-11-14 17:02   ` Bob Copeland
2008-11-14 17:37     ` Luis R. Rodriguez
2008-11-14 17:49       ` Bob Copeland
2008-11-14 18:06         ` Luis R. Rodriguez
2008-11-14 18:13           ` Sujith
2008-11-14 18:29             ` Luis R. Rodriguez
2008-11-14 18:33               ` Sujith
2008-11-14 18:37                 ` Luis R. Rodriguez
2008-11-14 19:33                   ` Dan McGee
2008-11-14 20:33                     ` Bob Copeland
2008-11-14 20:41                       ` Luis R. Rodriguez
2008-11-14 21:13                         ` Luis R. Rodriguez
2008-11-15  0:25                           ` Dan McGee
2008-11-15  0:36                             ` Luis R. Rodriguez
2008-11-15  2:05                               ` Dan McGee
2008-11-15  2:29                                 ` Bob Copeland
2008-11-15  2:57                                   ` Dan McGee
2008-11-15  6:30                                     ` Dan McGee
     [not found]                                     ` <449c10960811142229v77ea85f4nf898d447c7e63422@mail.gmail.com>
2008-11-15 18:19                                       ` Bob Copeland
2008-11-16  0:12                                         ` Dan McGee
2008-11-15  0:38                             ` Bob Copeland
2008-11-14 18:34           ` Bob Copeland
2008-11-16  2:11     ` Dan McGee
2008-11-16  2:38       ` Dan McGee
2008-11-16  5:48         ` Luis R. Rodriguez
2008-11-16  5:53           ` Bob Copeland
2008-11-16  6:05             ` Luis R. Rodriguez
2008-11-16  6:06               ` Luis R. Rodriguez
2008-11-16  6:15                 ` Luis R. Rodriguez
2008-11-16 16:20                   ` Bob Copeland
2008-11-16 16:38                     ` Dan McGee
2008-11-16 16:52                       ` Bob Copeland
2008-11-16 18:01                         ` Dan McGee
2008-11-16 18:05                           ` Johannes Berg
2008-11-16 18:20                             ` Luis R. Rodriguez
2008-11-16  5:55         ` Bob Copeland
2008-11-14 13:18 ` Bob Copeland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).