Linux wireless drivers development
 help / color / mirror / Atom feed
* [mmotm 2009-10-09-01-07] b43/wireless possible circular locking
From: Dave Young @ 2009-10-11  9:41 UTC (permalink / raw)
  To: akpm; +Cc: bcm43xx-dev, linux-wireless, linux-kernel

Hi,

I got lockdep warnings about possible circular lock with
b43 interface startup. It looks like a real problem.

[   71.974542] wlan0: deauthenticating from 00:19:e0:db:24:de by local choice (reason=3)
[   72.004352] b43-phy0 debug: Removing Interface type 2
[   72.005431] 
[   72.005435] =======================================================
[   72.006168] [ INFO: possible circular locking dependency detected ]
[   72.006759] 2.6.32-rc3-mm1 #4
[   72.007047] -------------------------------------------------------
[   72.007617] ifconfig/2175 is trying to acquire lock:
[   72.007617]  (&(&rfkill->poll_work)->work){+.+...}, at: [<c0239375>] __cancel_work_timer+0x8c/0x18e
[   72.007617] 
[   72.007617] but task is already holding lock:
[   72.007617]  (&wl->mutex){+.+.+.}, at: [<f8fa5359>] b43_op_stop+0x28/0x6a [b43]
[   72.007617] 
[   72.007617] which lock already depends on the new lock.
[   72.007617] 
[   72.007617] 
[   72.007617] the existing dependency chain (in reverse order) is:
[   72.007617] 
[   72.007617] -> #1 (&wl->mutex){+.+.+.}:
[   72.007617]        [<c024b251>] __lock_acquire+0x9e2/0xb73
[   72.007617]        [<c024b449>] lock_acquire+0x67/0x84
[   72.007617]        [<c055854d>] __mutex_lock_common+0x35/0x2ca
[   72.007617]        [<c0558880>] mutex_lock_nested+0x30/0x38
[   72.007617]        [<f8fb9bcd>] b43_rfkill_poll+0x26/0xc9 [b43]
[   72.007617]        [<f8f8330e>] ieee80211_rfkill_poll+0x1f/0x21 [mac80211]
[   72.007617]        [<f8396011>] cfg80211_rfkill_poll+0x11/0x13 [cfg80211]
[   72.007617]        [<f826b740>] rfkill_poll+0x14/0x2a [rfkill]
[   72.007617]        [<c0239927>] worker_thread+0x13b/0x1ff
[   72.007617]        [<c023be0c>] kthread+0x58/0x5d
[   72.007617]        [<c0203d07>] kernel_thread_helper+0x7/0x10
[   72.007617] 
[   72.007617] -> #0 (&(&rfkill->poll_work)->work){+.+...}:
[   72.007617]        [<c024b15c>] __lock_acquire+0x8ed/0xb73
[   72.007617]        [<c024b449>] lock_acquire+0x67/0x84
[   72.007617]        [<c02393a2>] __cancel_work_timer+0xb9/0x18e
[   72.007617]        [<c0239482>] cancel_delayed_work_sync+0xb/0xd
[   72.007617]        [<f826b71b>] rfkill_pause_polling+0x20/0x22 [rfkill]
[   72.007617]        [<f8396328>] wiphy_rfkill_stop_polling+0x10/0x12 [cfg80211]
[   72.007617]        [<f8fa5361>] b43_op_stop+0x30/0x6a [b43]
[   72.007617]        [<f8f895af>] ieee80211_stop_device+0x20/0x53 [mac80211]
[   72.007617]        [<f8f8141d>] ieee80211_stop+0x3d3/0x452 [mac80211]
[   72.007617]        [<c04e0c47>] dev_close+0x74/0x90
[   72.007617]        [<c04e082c>] dev_change_flags+0x96/0x144
[   72.007617]        [<c05262fd>] devinet_ioctl+0x212/0x468
[   72.007617]        [<c052814d>] inet_ioctl+0x8e/0xa7
[   72.007617]        [<c04d3e16>] sock_ioctl+0x1d3/0x1f7
[   72.007617]        [<c02a6b49>] vfs_ioctl+0x22/0x67
[   72.007617]        [<c02a707d>] do_vfs_ioctl+0x45f/0x493
[   72.007617]        [<c02a70f1>] sys_ioctl+0x40/0x5a
[   72.007617]        [<c020325d>] syscall_call+0x7/0xb
[   72.007617] 
[   72.007617] other info that might help us debug this:
[   72.007617] 
[   72.007617] 2 locks held by ifconfig/2175:
[   72.007617]  #0:  (rtnl_mutex){+.+.+.}, at: [<c04e8b90>] rtnl_lock+0xf/0x11
[   72.007617]  #1:  (&wl->mutex){+.+.+.}, at: [<f8fa5359>] b43_op_stop+0x28/0x6a [b43]
[   72.007617] 
[   72.007617] stack backtrace:
[   72.007617] Pid: 2175, comm: ifconfig Not tainted 2.6.32-rc3-mm1 #4
[   72.007617] Call Trace:
[   72.007617]  [<c024a863>] print_circular_bug+0x8a/0x96
[   72.007617]  [<c024b15c>] __lock_acquire+0x8ed/0xb73
[   72.007617]  [<c024b449>] lock_acquire+0x67/0x84
[   72.007617]  [<c0239375>] ? __cancel_work_timer+0x8c/0x18e
[   72.007617]  [<c02393a2>] __cancel_work_timer+0xb9/0x18e
[   72.007617]  [<c0239375>] ? __cancel_work_timer+0x8c/0x18e
[   72.007617]  [<c05587c8>] ? __mutex_lock_common+0x2b0/0x2ca
[   72.007617]  [<c0248233>] ? debug_mutex_free_waiter+0x45/0x48
[   72.007617]  [<c05587d8>] ? __mutex_lock_common+0x2c0/0x2ca
[   72.007617]  [<c0239482>] cancel_delayed_work_sync+0xb/0xd
[   72.007617]  [<f826b71b>] rfkill_pause_polling+0x20/0x22 [rfkill]
[   72.007617]  [<f8396328>] wiphy_rfkill_stop_polling+0x10/0x12 [cfg80211]
[   72.007617]  [<f8fa5361>] b43_op_stop+0x30/0x6a [b43]
[   72.007617]  [<f8f895af>] ieee80211_stop_device+0x20/0x53 [mac80211]
[   72.007617]  [<f8f8141d>] ieee80211_stop+0x3d3/0x452 [mac80211]
[   72.007617]  [<c0249d42>] ? trace_hardirqs_on+0xb/0xd
[   72.007617]  [<c022fd77>] ? _local_bh_enable_ip+0x9d/0xa6
[   72.007617]  [<c022fd88>] ? local_bh_enable_ip+0x8/0xa
[   72.007617]  [<c05592a8>] ? _spin_unlock_bh+0x25/0x28
[   72.007617]  [<c04e0c47>] dev_close+0x74/0x90
[   72.007617]  [<c04e082c>] dev_change_flags+0x96/0x144
[   72.007617]  [<c05262fd>] devinet_ioctl+0x212/0x468
[   72.007617]  [<c052814d>] inet_ioctl+0x8e/0xa7
[   72.007617]  [<c04d3e16>] sock_ioctl+0x1d3/0x1f7
[   72.007617]  [<c04d3c43>] ? sock_ioctl+0x0/0x1f7
[   72.007617]  [<c02a6b49>] vfs_ioctl+0x22/0x67
[   72.007617]  [<c02a707d>] do_vfs_ioctl+0x45f/0x493
[   72.007617]  [<c0221652>] ? need_resched+0x14/0x1e
[   72.007617]  [<c0557e29>] ? schedule+0x6ed/0x6fd
[   72.007617]  [<c055b24e>] ? do_page_fault+0x29d/0x2a5
[   72.007617]  [<c02a70f1>] sys_ioctl+0x40/0x5a
[   72.007617]  [<c020325d>] syscall_call+0x7/0xb
[   73.122930] b43-phy0 debug: Wireless interface stopped
[   73.131557] ifconfig used greatest stack depth: 5660 bytes left
[   73.310494] b43-phy0: Loading firmware version 410.2160 (2007-05-26 15:32:10)
[   73.329042] b43-phy0 debug: b2062: Using crystal tab entry 19200 kHz.
[   77.923855] b43-phy0 debug: Chip initialized
[   77.924695] b43-phy0 debug: PIO initialized
[   77.925410] b43-phy0 debug: QoS enabled
[   77.941415] b43-phy0 debug: Wireless interface started
[   77.942164] b43-phy0 debug: Adding Interface type 2
[   77.961969] wlan0: direct probe to AP 00:19:e0:db:24:de (try 1)
[   78.160198] wlan0: direct probe to AP 00:19:e0:db:24:de (try 2)
[   78.164241] wlan0: direct probe responded
[   78.164649] wlan0: authenticate with AP 00:19:e0:db:24:de (try 1)
[   78.167341] wlan0: authenticated
[   78.167791] wlan0: associate with AP 00:19:e0:db:24:de (try 1)
[   78.170935] wlan0: RX ReassocResp from 00:19:e0:db:24:de (capab=0x421 status=0 aid=4)
[   78.171690] wlan0: associated

--
Regards
dave

^ permalink raw reply

* Re: [PATCH] mac80211: fix logic error ibss merge bssid check
From: Johannes Berg @ 2009-10-11  9:43 UTC (permalink / raw)
  To: Felix Fietkau; +Cc: linux-wireless, John W. Linville
In-Reply-To: <4AD14F26.6030304@openwrt.org>

[-- Attachment #1: Type: text/plain, Size: 623 bytes --]

On Sun, 2009-10-11 at 05:21 +0200, Felix Fietkau wrote:
> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
> 
> --- a/net/mac80211/ibss.c
> +++ b/net/mac80211/ibss.c
> @@ -544,7 +544,7 @@ static void ieee80211_sta_find_ibss(stru
>  		       "%pM\n", bss->cbss.bssid, ifibss->bssid);
>  #endif /* CONFIG_MAC80211_IBSS_DEBUG */
> 
> -	if (bss && memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN)) {
> +	if (bss && !memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN)) {

Acked-by: Johannes Berg <johannes@sipsolutions.net>

I'll also send the race fix now, would appreciate you giving it a try.

Thanks,
johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* [PATCH] mac80211: fix ibss race
From: Johannes Berg @ 2009-10-11  9:47 UTC (permalink / raw)
  To: John Linville; +Cc: Felix Fietkau, linux-wireless

When a scan completes, we call ieee80211_sta_find_ibss(),
which is also called from other places. When the scan was
done in software, there's no problem as both run from the
single-threaded mac80211 workqueue and are thus serialised
against each other, but with hardware scan the completion
can be in a different context and race against callers of
this function from the workqueue (e.g. due to beacon RX).
So instead of calling ieee80211_sta_find_ibss() directly,
just arm the timer and have it fire, scheduling the work,
which will invoke ieee80211_sta_find_ibss() (if that is
appropriate in the current state).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/mac80211/ibss.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- wireless-testing.orig/net/mac80211/ibss.c	2009-10-02 10:41:44.000000000 +0200
+++ wireless-testing/net/mac80211/ibss.c	2009-10-06 14:57:27.000000000 +0200
@@ -831,7 +831,7 @@ void ieee80211_ibss_notify_scan_complete
 		if (!sdata->u.ibss.ssid_len)
 			continue;
 		sdata->u.ibss.last_scan_completed = jiffies;
-		ieee80211_sta_find_ibss(sdata);
+		mod_timer(&sdata->u.ibss.timer, 0);
 	}
 	mutex_unlock(&local->iflist_mtx);
 }



^ permalink raw reply

* Re: [mmotm 2009-10-09-01-07] b43/wireless possible circular locking
From: Johannes Berg @ 2009-10-11  9:51 UTC (permalink / raw)
  To: Dave Young; +Cc: akpm, bcm43xx-dev, linux-wireless, linux-kernel
In-Reply-To: <20091011094139.GA2778@darkstar>

[-- Attachment #1: Type: text/plain, Size: 1001 bytes --]

On Sun, 2009-10-11 at 17:41 +0800, Dave Young wrote:
> Hi,
> 
> I got lockdep warnings about possible circular lock with
> b43 interface startup. It looks like a real problem.
> 
> [   71.974542] wlan0: deauthenticating from 00:19:e0:db:24:de by local choice (reason=3)
> [   72.004352] b43-phy0 debug: Removing Interface type 2
> [   72.005431] 
> [   72.005435] =======================================================
> [   72.006168] [ INFO: possible circular locking dependency detected ]
> [   72.006759] 2.6.32-rc3-mm1 #4
> [   72.007047] -------------------------------------------------------
> [   72.007617] ifconfig/2175 is trying to acquire lock:
> [   72.007617]  (&(&rfkill->poll_work)->work){+.+...}, at: [<c0239375>] __cancel_work_timer+0x8c/0x18e
> [   72.007617] 
> [   72.007617] but task is already holding lock:
> [   72.007617]  (&wl->mutex){+.+.+.}, at: [<f8fa5359>] b43_op_stop+0x28/0x6a [b43]

I believe this is already taken care of by Larry.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: Johannes Berg @ 2009-10-11 10:08 UTC (permalink / raw)
  To: Dave Young; +Cc: linux-kernel, tglx, linux-wireless, David S. Miller
In-Reply-To: <20091011095217.GA2200@darkstar>

[-- Attachment #1: Type: text/plain, Size: 881 bytes --]

On Sun, 2009-10-11 at 17:52 +0800, Dave Young wrote:

> With kernel 2.6.32-rc3-00052-g0eca52a I got following KERN_ERR
> messages just while using firefox:
> 
> [  130.527399] NOHZ: local_softirq_pending 08

> Any idea? or known issue?

Are you using b43 (or wl12x1)? If so, it's a known issue, but the driver
was recently left without an active maintainer in a brouhaha about a bug
fix.

Cf. http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440

Absent proof that mac80211 is safe to run with BHs enabled, the correct
solution is disabling tasklets around the RX function, unlike all the
proposed patches. However, Michael thinks it's such a bad solution that
he has refused to implement it. So far, nobody has bothered to fix the
drivers.

FWIW, I believe the bug to be in b43 and wl12x1, and not as Michael
thinks in the stack.

johannes


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: Michael Buesch @ 2009-10-11 10:17 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Dave Young, linux-kernel, tglx, linux-wireless, David S. Miller
In-Reply-To: <1255255735.4095.53.camel@johannes.local>

On Sunday 11 October 2009 12:08:55 Johannes Berg wrote:
> On Sun, 2009-10-11 at 17:52 +0800, Dave Young wrote:
> 
> > With kernel 2.6.32-rc3-00052-g0eca52a I got following KERN_ERR
> > messages just while using firefox:
> > 
> > [  130.527399] NOHZ: local_softirq_pending 08
> 
> > Any idea? or known issue?
> 
> Are you using b43 (or wl12x1)? If so, it's a known issue, but the driver
> was recently left without an active maintainer in a brouhaha about a bug
> fix.
> 
> Cf. http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440
> 
> Absent proof that mac80211 is safe to run with BHs enabled, the correct
> solution is disabling tasklets around the RX function, unlike all the
> proposed patches. However, Michael thinks it's such a bad solution that
> he has refused to implement it.

Ehm, no. That's not exactly true.
We call the non-_irqsafe functions, which by definition are designed to
run in non-irq (soft or hard) context. At least that's how I understand the
documentation, last time I read it.
Why don't you simply do local_bh_disable() in those functions, if they
require bh disabled, instead of depending on the driver doing it?

> FWIW, I believe the bug to be in b43 and wl12x1, and not as Michael
> thinks in the stack.

If mac80211 requires BHs disabled, it should do this.

-- 
Greetings, Michael.

^ permalink raw reply

* [PATCH] b43: fix ieee80211_rx() context
From: Johannes Berg @ 2009-10-11 10:19 UTC (permalink / raw)
  To: John Linville; +Cc: David Miller, Kalle Valo, Dave Young, linux-wireless

Due to the way it interacts with the networking
stack and other parts of mac80211, ieee80211_rx()
must be called with disabled softirqs.

Michael, the former maintainer of this driver,
has refused to fix the problem this way instead
proposing a much more invasive patch that could
not even be proved correct wrt. locking inside
mac80211. Regardless of that, he believes this
to be a bug in mac80211, and has also publicly
stated [1] that he does not care about this even
though it is a regression introduced by his own
patches.

Since nobody else seems to be wanting to fix the
problem, I'll just fix it for the benefit of the
many users of this driver.

[1] http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440/focus=40266

Reported-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 drivers/net/wireless/b43/xmit.c |    3 +++
 1 file changed, 3 insertions(+)

--- wireless-testing.orig/drivers/net/wireless/b43/xmit.c	2009-10-11 12:11:50.000000000 +0200
+++ wireless-testing/drivers/net/wireless/b43/xmit.c	2009-10-11 12:12:06.000000000 +0200
@@ -690,7 +690,10 @@ void b43_rx(struct b43_wldev *dev, struc
 	}
 
 	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+
+	local_bh_disable();
 	ieee80211_rx(dev->wl->hw, skb);
+	local_bh_enable();
 
 #if B43_DEBUG
 	dev->rx_count++;



^ permalink raw reply

* Re: [PATCH] b43: fix ieee80211_rx() context
From: Michael Buesch @ 2009-10-11 10:26 UTC (permalink / raw)
  To: Johannes Berg
  Cc: John Linville, David Miller, Kalle Valo, Dave Young,
	linux-wireless
In-Reply-To: <1255256361.4095.56.camel@johannes.local>

On Sunday 11 October 2009 12:19:21 Johannes Berg wrote:
> Due to the way it interacts with the networking
> stack and other parts of mac80211, ieee80211_rx()
> must be called with disabled softirqs.

Is this stated in the documentation somewhere?

> Michael, the former maintainer of this driver,
> has refused to fix the problem this way instead
> proposing a much more invasive patch that could
> not even be proved correct wrt. locking inside
> mac80211. Regardless of that, he believes this
> to be a bug in mac80211, and has also publicly
> stated [1] that he does not care about this even
> though it is a regression introduced by his own
> patches.

What if we leave slander out of the commit messages?

> Since nobody else seems to be wanting to fix the
> problem, I'll just fix it for the benefit of the
> many users of this driver.
> 
> [1] http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440/focus=40266
> 
> Reported-by: Dave Young <hidave.darkstar@gmail.com>
> Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
> ---
>  drivers/net/wireless/b43/xmit.c |    3 +++
>  1 file changed, 3 insertions(+)
> 
> --- wireless-testing.orig/drivers/net/wireless/b43/xmit.c	2009-10-11 12:11:50.000000000 +0200
> +++ wireless-testing/drivers/net/wireless/b43/xmit.c	2009-10-11 12:12:06.000000000 +0200
> @@ -690,7 +690,10 @@ void b43_rx(struct b43_wldev *dev, struc
>  	}
>  
>  	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
> +
> +	local_bh_disable();
>  	ieee80211_rx(dev->wl->hw, skb);
> +	local_bh_enable();
>  
>  #if B43_DEBUG
>  	dev->rx_count++;



-- 
Greetings, Michael.

^ permalink raw reply

* Re: [PATCH] b43: fix ieee80211_rx() context
From: Johannes Berg @ 2009-10-11 10:31 UTC (permalink / raw)
  To: Michael Buesch
  Cc: John Linville, David Miller, Kalle Valo, Dave Young,
	linux-wireless
In-Reply-To: <200910111226.44778.mb@bu3sch.de>

[-- Attachment #1: Type: text/plain, Size: 1401 bytes --]

On Sun, 2009-10-11 at 12:26 +0200, Michael Buesch wrote:
> On Sunday 11 October 2009 12:19:21 Johannes Berg wrote:
> > Due to the way it interacts with the networking
> > stack and other parts of mac80211, ieee80211_rx()
> > must be called with disabled softirqs.
> 
> Is this stated in the documentation somewhere?

No. However, there are many things that aren't in the documentation, I'm
working on a patch to add a note.

> > Michael, the former maintainer of this driver,
> > has refused to fix the problem this way instead
> > proposing a much more invasive patch that could
> > not even be proved correct wrt. locking inside
> > mac80211. Regardless of that, he believes this
> > to be a bug in mac80211, and has also publicly
> > stated [1] that he does not care about this even
> > though it is a regression introduced by his own
> > patches.
> 
> What if we leave slander out of the commit messages?

As far as I know, it is an accurate account of what happened in the
other thread, and as such is not slander. I just wanted to provide a
rationale for me fixing this bug instead of you. If you disagree that
this is an accurate representation, I invite you to summarise the thread
that caused this miserable situation of a known bug not being fixed for
a very long time despite appropriate fixes being known in your own words
for the commit message.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: [PATCH] b43: fix ieee80211_rx() context
From: Michael Buesch @ 2009-10-11 10:35 UTC (permalink / raw)
  To: Johannes Berg
  Cc: John Linville, David Miller, Kalle Valo, Dave Young,
	linux-wireless
In-Reply-To: <1255257066.4095.66.camel@johannes.local>

On Sunday 11 October 2009 12:31:06 Johannes Berg wrote:
> On Sun, 2009-10-11 at 12:26 +0200, Michael Buesch wrote:
> > On Sunday 11 October 2009 12:19:21 Johannes Berg wrote:
> > > Due to the way it interacts with the networking
> > > stack and other parts of mac80211, ieee80211_rx()
> > > must be called with disabled softirqs.
> > 
> > Is this stated in the documentation somewhere?
> 
> No. However, there are many things that aren't in the documentation, I'm
> working on a patch to add a note.

Ok, thanks a lot.

> I just wanted to provide a
> rationale for me fixing this bug instead of you.

Since when do we require that in commit messages?

> If you disagree that 
> this is an accurate representation, I invite you to summarise the thread
> that caused this miserable situation of a known bug not being fixed for
> a very long time despite appropriate fixes being known in your own words
> for the commit message.

If you'd care _that_ much, you could have just reverted my commit.
Yes, I introduced the regression and I was unable to cook up a fix for it. So the logical
reaction to that would be to revert my commit.

-- 
Greetings, Michael.

^ permalink raw reply

* Re: [PATCH 1/3] iwmc3200top: Add Intel Wireless MultiCom 3200 top driver.
From: David Miller @ 2009-10-11 10:36 UTC (permalink / raw)
  To: tomasw
  Cc: linville, netdev, linux-wireless, linux-mmc, yi.zhu,
	inaky.perez-gonzalez, cindy.h.kao, guy.cohen, ron.rindjunsky
In-Reply-To: <1ba2fa240910110105x33fb251ar3437dd5fee552735@mail.gmail.com>

From: Tomas Winkler <tomasw@gmail.com>
Date: Sun, 11 Oct 2009 10:05:20 +0200

> Just close my eyes and there is new game to play. :)
> It's not in the patchwork, so is there any reason you are not planning
> to add it.   The patch intention was for net-next, it looks like
> I didn't mark it as such, my fault.

Because there still seems to be some confusion between which of these
bits go through John Linville as a wireless driver and which bits
go directly through me.

By default I assume John picks up "wireless" drivers and send them
to me in his wireless merges to me.

If that's not the case, explicitly do a fresh submission of this patch
and explicitly ask me to merge it.


^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: Johannes Berg @ 2009-10-11 10:37 UTC (permalink / raw)
  To: Michael Buesch
  Cc: Dave Young, linux-kernel, tglx, linux-wireless, David S. Miller
In-Reply-To: <200910111217.31883.mb@bu3sch.de>

[-- Attachment #1: Type: text/plain, Size: 1366 bytes --]

On Sun, 2009-10-11 at 12:17 +0200, Michael Buesch wrote:

> Ehm, no. That's not exactly true.
> We call the non-_irqsafe functions, which by definition are designed to
> run in non-irq (soft or hard) context. At least that's how I understand the
> documentation, last time I read it.

So maybe the documentation is not entirely accurate. Such happens. From
this and previous threads tt's pretty obvious that these functions
cannot be called with softirqs enabled. And I've also stated before that
I do not believe that we should call them with softirqs enabled without
auditing the code for locking, which historically has been a weak point
of mac80211.

> Why don't you simply do local_bh_disable() in those functions, if they
> require bh disabled, instead of depending on the driver doing it?
> 
> > FWIW, I believe the bug to be in b43 and wl12x1, and not as Michael
> > thinks in the stack.
> 
> If mac80211 requires BHs disabled, it should do this.

I don't believe adding that into mac80211, even though it nests, is a
good idea for the case of many drivers where mac80211 and/or the driver
knows. It's pretty damn trivial to add two lines of code to the driver,
instead of penalising every other driver. The typical kernel style is
making things provide the required context, not a function take any
possible context.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: David Miller @ 2009-10-11 10:38 UTC (permalink / raw)
  To: mb; +Cc: johannes, hidave.darkstar, linux-kernel, tglx, linux-wireless
In-Reply-To: <200910111217.31883.mb@bu3sch.de>

From: Michael Buesch <mb@bu3sch.de>
Date: Sun, 11 Oct 2009 12:17:30 +0200

> On Sunday 11 October 2009 12:08:55 Johannes Berg wrote:
>> On Sun, 2009-10-11 at 17:52 +0800, Dave Young wrote:
>> 
>> FWIW, I believe the bug to be in b43 and wl12x1, and not as Michael
>> thinks in the stack.
> 
> If mac80211 requires BHs disabled, it should do this.

That's overhead, and %99 of drivers do not require it, and therefore
for %99 of drivers it's unnecessary overhead.

In general we avoid doing things like that.  Instead, we put the
cost only where it's actually needed.

^ permalink raw reply

* Re: [PATCH] b43: fix ieee80211_rx() context
From: David Miller @ 2009-10-11 10:39 UTC (permalink / raw)
  To: johannes; +Cc: linville, kalle.valo, hidave.darkstar, linux-wireless
In-Reply-To: <1255256361.4095.56.camel@johannes.local>

From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 11 Oct 2009 12:19:21 +0200

> Due to the way it interacts with the networking
> stack and other parts of mac80211, ieee80211_rx()
> must be called with disabled softirqs.
> 
> Michael, the former maintainer of this driver,
> has refused to fix the problem this way instead
> proposing a much more invasive patch that could
> not even be proved correct wrt. locking inside
> mac80211. Regardless of that, he believes this
> to be a bug in mac80211, and has also publicly
> stated [1] that he does not care about this even
> though it is a regression introduced by his own
> patches.
> 
> Since nobody else seems to be wanting to fix the
> problem, I'll just fix it for the benefit of the
> many users of this driver.
> 
> [1] http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440/focus=40266
> 
> Reported-by: Dave Young <hidave.darkstar@gmail.com>
> Signed-off-by: Johannes Berg <johannes@sipsolutions.net>

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: Dave Young @ 2009-10-11 10:55 UTC (permalink / raw)
  To: Johannes Berg; +Cc: linux-kernel, tglx, linux-wireless, David S. Miller
In-Reply-To: <1255255735.4095.53.camel@johannes.local>

On Sun, Oct 11, 2009 at 6:08 PM, Johannes Berg
<johannes@sipsolutions.net> wrote:
> On Sun, 2009-10-11 at 17:52 +0800, Dave Young wrote:
>
>> With kernel 2.6.32-rc3-00052-g0eca52a I got following KERN_ERR
>> messages just while using firefox:
>>
>> [  130.527399] NOHZ: local_softirq_pending 08
>
>> Any idea? or known issue?
>
> Are you using b43 (or wl12x1)? If so, it's a known issue, but the driver
> was recently left without an active maintainer in a brouhaha about a bug
> fix.

Yes, I'm using b43. I will test the patch you posted in another thread.

>
> Cf. http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440
>
> Absent proof that mac80211 is safe to run with BHs enabled, the correct
> solution is disabling tasklets around the RX function, unlike all the
> proposed patches. However, Michael thinks it's such a bad solution that
> he has refused to implement it. So far, nobody has bothered to fix the
> drivers.
>
> FWIW, I believe the bug to be in b43 and wl12x1, and not as Michael
> thinks in the stack.
>
> johannes
>
>



-- 
Regards
dave

^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: Johannes Berg @ 2009-10-11 11:40 UTC (permalink / raw)
  To: Tilman Schmidt
  Cc: Dave Young, linux-kernel, tglx, linux-wireless, David S. Miller
In-Reply-To: <4AD1BF06.3050103@phoenixsoftware.de>

[-- Attachment #1: Type: text/plain, Size: 468 bytes --]

On Sun, 2009-10-11 at 13:18 +0200, Tilman Schmidt wrote:

> Can you explain a bit more what that message is about?
> I am encountering it in a completely different context
> (PPP over ISDN) and I would like to know where to start
> looking for the cause and developing a fix. The thread
> on linux.kernel.wireless.general only seems to address
> the specific situation in the wireless stack.

Basically, calling netif_rx() with softirqs enabled.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: NOHZ: local_softirq_pending 08
From: Tilman Schmidt @ 2009-10-11 11:18 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Dave Young, linux-kernel, tglx, linux-wireless, David S. Miller
In-Reply-To: <1255255735.4095.53.camel@johannes.local>

On Sun, 11 Oct 2009 12:08:55 +0200, Johannes Berg wrote:
> On Sun, 2009-10-11 at 17:52 +0800, Dave Young wrote:
> 
>> With kernel 2.6.32-rc3-00052-g0eca52a I got following KERN_ERR
>> messages just while using firefox:
>>
>> [  130.527399] NOHZ: local_softirq_pending 08
> 
>> Any idea? or known issue?
> 
> Are you using b43 (or wl12x1)? If so, it's a known issue, but the driver
> was recently left without an active maintainer in a brouhaha about a bug
> fix.
> 
> Cf. http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440

Can you explain a bit more what that message is about?
I am encountering it in a completely different context
(PPP over ISDN) and I would like to know where to start
looking for the cause and developing a fix. The thread
on linux.kernel.wireless.general only seems to address
the specific situation in the wireless stack.

Thanks,
Tilman


^ permalink raw reply

* Re: [PATCH] b43: fix ieee80211_rx() context
From: Dave Young @ 2009-10-11 11:53 UTC (permalink / raw)
  To: David Miller; +Cc: johannes, linville, kalle.valo, linux-wireless
In-Reply-To: <20091011.033907.35699436.davem@davemloft.net>

On Sun, Oct 11, 2009 at 6:39 PM, David Miller <davem@davemloft.net> wrote:
> From: Johannes Berg <johannes@sipsolutions.net>
> Date: Sun, 11 Oct 2009 12:19:21 +0200
>
>> Due to the way it interacts with the networking
>> stack and other parts of mac80211, ieee80211_rx()
>> must be called with disabled softirqs.
>>
>> Michael, the former maintainer of this driver,
>> has refused to fix the problem this way instead
>> proposing a much more invasive patch that could
>> not even be proved correct wrt. locking inside
>> mac80211. Regardless of that, he believes this
>> to be a bug in mac80211, and has also publicly
>> stated [1] that he does not care about this even
>> though it is a regression introduced by his own
>> patches.
>>
>> Since nobody else seems to be wanting to fix the
>> problem, I'll just fix it for the benefit of the
>> many users of this driver.
>>
>> [1] http://thread.gmane.org/gmane.linux.kernel.wireless.general/39440/focus=40266
>>
>> Reported-by: Dave Young <hidave.darkstar@gmail.com>
>> Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
>
> Acked-by: David S. Miller <davem@davemloft.net>
>

Tested-by: Dave Young <hidave.darkstar@gmail.com>

-- 
Regards
dave

^ permalink raw reply

* RaLink 148f:3070 not working since upgrade from 2.6.30 to 2.6.31
From: Bráulio B O Bhavamitra @ 2009-10-11 12:13 UTC (permalink / raw)
  To: linux-wireless
In-Reply-To: <1df1788c0910110455q570ca89fy6edbda3235a7a35e@mail.gmail.com>

Since the upgrade from kernel 2.6.30 to 2.6.31 on archlinux the usb
wireless from Ralink doesn't work anymore.
Please ask for more info if necessary.

dmesg:
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_regbusy_read: Error - Indirect register access
failed: offset=0x00007010, value=0xffff8800
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_regbusy_read: Error - Indirect register access
failed: offset=0x00007010, value=0xffff8800
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed
for offset 0x7010 with error -19.
phy2 -> rt2x00usb_regbusy_read: Error - Indirect register access
failed: offset=0x00007010, value=0xffff8800
usb 2-2: new high speed USB device using ehci_hcd and address 6
usb 2-2: configuration #1 chosen from 1 choice
phy3: Selected rate control algorithm 'minstrel'
Registered led device: rt2800usb-phy3::radio
Registered led device: rt2800usb-phy3::assoc
Registered led device: rt2800usb-phy3::quality
rt2800usb 2-2:1.0: firmware: requesting rt2870.bin
phy3 -> rt2x00lib_request_firmware: Error - Failed to request Firmware.
rt2800usb 2-2:1.0: firmware: requesting rt2870.bin
phy3 -> rt2x00lib_request_firmware: Error - Failed to request Firmware.

lsusb -v:
Bus 002 Device 006: ID 148f:3070 Ralink Technology, Corp.
Device Descriptor:
  bLength                18
  bDescriptorType         1
  bcdUSB               2.00
  bDeviceClass            0 (Defined at Interface level)
  bDeviceSubClass         0
  bDeviceProtocol         0
  bMaxPacketSize0        64
  idVendor           0x148f Ralink Technology, Corp.
  idProduct          0x3070
  bcdDevice            1.01
  iManufacturer           1 Ralink
  iProduct                2 802.11 n WLAN
  iSerial                 3
  bNumConfigurations      1
  Configuration Descriptor:
    bLength                 9
    bDescriptorType         2
    wTotalLength           67
    bNumInterfaces          1
    bConfigurationValue     1
    iConfiguration          0
    bmAttributes         0x80
      (Bus Powered)
    MaxPower              450mA
    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        0
      bAlternateSetting       0
      bNumEndpoints           7
      bInterfaceClass       255 Vendor Specific Class
      bInterfaceSubClass    255 Vendor Specific Subclass
      bInterfaceProtocol    255 Vendor Specific Protocol
      iInterface              5
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x81  EP 1 IN
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x01  EP 1 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x02  EP 2 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x03  EP 3 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x04  EP 4 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x05  EP 5 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x06  EP 6 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
Device Qualifier (for other device speed):
  bLength                10
  bDescriptorType         6
  bcdUSB               2.00
  bDeviceClass            0 (Defined at Interface level)
  bDeviceSubClass         0
  bDeviceProtocol         0
  bMaxPacketSize0        64
  bNumConfigurations      1
Device Status:     0x0000
  (Bus Powered)

^ permalink raw reply

* Re: RaLink 148f:3070 not working since upgrade from 2.6.30 to 2.6.31
From: Bráulio B O Bhavamitra @ 2009-10-11 12:18 UTC (permalink / raw)
  To: linux-wireless
In-Reply-To: <1df1788c0910110513j6847bf97q71059fec4ae9722b@mail.gmail.com>

QWxzbyBpbiBVYnVudHUgS2FybWljIEJldGEgKHdoZXJlIHRoZSBmaXJtd2FyZSBpcyBwcmVzZW50
KSBpdCBkb2Vzbid0CndvcmsgKHJhZGlvIG5vdCB3b3JraW5nLCBjYW4ndCBmaW5kIGFueSBhY2Nl
c3MgcG9pbnQpCgoyMDA5LzEwLzExIEJyw6F1bGlvIEIgTyBCaGF2YW1pdHJhIDxicmF1bGlvYm9A
Z21haWwuY29tPjoKPiBTaW5jZSB0aGUgdXBncmFkZSBmcm9tIGtlcm5lbCAyLjYuMzAgdG8gMi42
LjMxIG9uIGFyY2hsaW51eCB0aGUgdXNiCj4gd2lyZWxlc3MgZnJvbSBSYWxpbmsgZG9lc24ndCB3
b3JrIGFueW1vcmUuCj4gUGxlYXNlIGFzayBmb3IgbW9yZSBpbmZvIGlmIG5lY2Vzc2FyeS4KPgo+
IGRtZXNnOgo+IHBoeTIgLT4gcnQyeDAwdXNiX3ZlbmRvcl9yZXF1ZXN0OiBFcnJvciAtIFZlbmRv
ciBSZXF1ZXN0IDB4MDcgZmFpbGVkCj4gZm9yIG9mZnNldCAweDcwMTAgd2l0aCBlcnJvciAtMTku
Cj4gcGh5MiAtPiBydDJ4MDB1c2JfdmVuZG9yX3JlcXVlc3Q6IEVycm9yIC0gVmVuZG9yIFJlcXVl
c3QgMHgwNyBmYWlsZWQKPiBmb3Igb2Zmc2V0IDB4NzAxMCB3aXRoIGVycm9yIC0xOS4KPiBwaHky
IC0+IHJ0MngwMHVzYl92ZW5kb3JfcmVxdWVzdDogRXJyb3IgLSBWZW5kb3IgUmVxdWVzdCAweDA3
IGZhaWxlZAo+IGZvciBvZmZzZXQgMHg3MDEwIHdpdGggZXJyb3IgLTE5Lgo+IHBoeTIgLT4gcnQy
eDAwdXNiX3ZlbmRvcl9yZXF1ZXN0OiBFcnJvciAtIFZlbmRvciBSZXF1ZXN0IDB4MDcgZmFpbGVk
Cj4gZm9yIG9mZnNldCAweDcwMTAgd2l0aCBlcnJvciAtMTkuCj4gcGh5MiAtPiBydDJ4MDB1c2Jf
dmVuZG9yX3JlcXVlc3Q6IEVycm9yIC0gVmVuZG9yIFJlcXVlc3QgMHgwNyBmYWlsZWQKPiBmb3Ig
b2Zmc2V0IDB4NzAxMCB3aXRoIGVycm9yIC0xOS4KPiBwaHkyIC0+IHJ0MngwMHVzYl9yZWdidXN5
X3JlYWQ6IEVycm9yIC0gSW5kaXJlY3QgcmVnaXN0ZXIgYWNjZXNzCj4gZmFpbGVkOiBvZmZzZXQ9
MHgwMDAwNzAxMCwgdmFsdWU9MHhmZmZmODgwMAo+IHBoeTIgLT4gcnQyeDAwdXNiX3ZlbmRvcl9y
ZXF1ZXN0OiBFcnJvciAtIFZlbmRvciBSZXF1ZXN0IDB4MDcgZmFpbGVkCj4gZm9yIG9mZnNldCAw
eDcwMTAgd2l0aCBlcnJvciAtMTkuCj4gcGh5MiAtPiBydDJ4MDB1c2JfdmVuZG9yX3JlcXVlc3Q6
IEVycm9yIC0gVmVuZG9yIFJlcXVlc3QgMHgwNyBmYWlsZWQKPiBmb3Igb2Zmc2V0IDB4NzAxMCB3
aXRoIGVycm9yIC0xOS4KPiBwaHkyIC0+IHJ0MngwMHVzYl92ZW5kb3JfcmVxdWVzdDogRXJyb3Ig
LSBWZW5kb3IgUmVxdWVzdCAweDA3IGZhaWxlZAo+IGZvciBvZmZzZXQgMHg3MDEwIHdpdGggZXJy
b3IgLTE5Lgo+IHBoeTIgLT4gcnQyeDAwdXNiX3ZlbmRvcl9yZXF1ZXN0OiBFcnJvciAtIFZlbmRv
ciBSZXF1ZXN0IDB4MDcgZmFpbGVkCj4gZm9yIG9mZnNldCAweDcwMTAgd2l0aCBlcnJvciAtMTku
Cj4gcGh5MiAtPiBydDJ4MDB1c2JfdmVuZG9yX3JlcXVlc3Q6IEVycm9yIC0gVmVuZG9yIFJlcXVl
c3QgMHgwNyBmYWlsZWQKPiBmb3Igb2Zmc2V0IDB4NzAxMCB3aXRoIGVycm9yIC0xOS4KPiBwaHky
IC0+IHJ0MngwMHVzYl9yZWdidXN5X3JlYWQ6IEVycm9yIC0gSW5kaXJlY3QgcmVnaXN0ZXIgYWNj
ZXNzCj4gZmFpbGVkOiBvZmZzZXQ9MHgwMDAwNzAxMCwgdmFsdWU9MHhmZmZmODgwMAo+IHBoeTIg
LT4gcnQyeDAwdXNiX3ZlbmRvcl9yZXF1ZXN0OiBFcnJvciAtIFZlbmRvciBSZXF1ZXN0IDB4MDcg
ZmFpbGVkCj4gZm9yIG9mZnNldCAweDcwMTAgd2l0aCBlcnJvciAtMTkuCj4gcGh5MiAtPiBydDJ4
MDB1c2JfdmVuZG9yX3JlcXVlc3Q6IEVycm9yIC0gVmVuZG9yIFJlcXVlc3QgMHgwNyBmYWlsZWQK
PiBmb3Igb2Zmc2V0IDB4NzAxMCB3aXRoIGVycm9yIC0xOS4KPiBwaHkyIC0+IHJ0MngwMHVzYl92
ZW5kb3JfcmVxdWVzdDogRXJyb3IgLSBWZW5kb3IgUmVxdWVzdCAweDA3IGZhaWxlZAo+IGZvciBv
ZmZzZXQgMHg3MDEwIHdpdGggZXJyb3IgLTE5Lgo+IHBoeTIgLT4gcnQyeDAwdXNiX3ZlbmRvcl9y
ZXF1ZXN0OiBFcnJvciAtIFZlbmRvciBSZXF1ZXN0IDB4MDcgZmFpbGVkCj4gZm9yIG9mZnNldCAw
eDcwMTAgd2l0aCBlcnJvciAtMTkuCj4gcGh5MiAtPiBydDJ4MDB1c2JfdmVuZG9yX3JlcXVlc3Q6
IEVycm9yIC0gVmVuZG9yIFJlcXVlc3QgMHgwNyBmYWlsZWQKPiBmb3Igb2Zmc2V0IDB4NzAxMCB3
aXRoIGVycm9yIC0xOS4KPiBwaHkyIC0+IHJ0MngwMHVzYl9yZWdidXN5X3JlYWQ6IEVycm9yIC0g
SW5kaXJlY3QgcmVnaXN0ZXIgYWNjZXNzCj4gZmFpbGVkOiBvZmZzZXQ9MHgwMDAwNzAxMCwgdmFs
dWU9MHhmZmZmODgwMAo+IHVzYiAyLTI6IG5ldyBoaWdoIHNwZWVkIFVTQiBkZXZpY2UgdXNpbmcg
ZWhjaV9oY2QgYW5kIGFkZHJlc3MgNgo+IHVzYiAyLTI6IGNvbmZpZ3VyYXRpb24gIzEgY2hvc2Vu
IGZyb20gMSBjaG9pY2UKPiBwaHkzOiBTZWxlY3RlZCByYXRlIGNvbnRyb2wgYWxnb3JpdGhtICdt
aW5zdHJlbCcKPiBSZWdpc3RlcmVkIGxlZCBkZXZpY2U6IHJ0MjgwMHVzYi1waHkzOjpyYWRpbwo+
IFJlZ2lzdGVyZWQgbGVkIGRldmljZTogcnQyODAwdXNiLXBoeTM6OmFzc29jCj4gUmVnaXN0ZXJl
ZCBsZWQgZGV2aWNlOiBydDI4MDB1c2ItcGh5Mzo6cXVhbGl0eQo+IHJ0MjgwMHVzYiAyLTI6MS4w
OiBmaXJtd2FyZTogcmVxdWVzdGluZyBydDI4NzAuYmluCj4gcGh5MyAtPiBydDJ4MDBsaWJfcmVx
dWVzdF9maXJtd2FyZTogRXJyb3IgLSBGYWlsZWQgdG8gcmVxdWVzdCBGaXJtd2FyZS4KPiBydDI4
MDB1c2IgMi0yOjEuMDogZmlybXdhcmU6IHJlcXVlc3RpbmcgcnQyODcwLmJpbgo+IHBoeTMgLT4g
cnQyeDAwbGliX3JlcXVlc3RfZmlybXdhcmU6IEVycm9yIC0gRmFpbGVkIHRvIHJlcXVlc3QgRmly
bXdhcmUuCj4KPiBsc3VzYiAtdjoKPiBCdXMgMDAyIERldmljZSAwMDY6IElEIDE0OGY6MzA3MCBS
YWxpbmsgVGVjaG5vbG9neSwgQ29ycC4KPiBEZXZpY2UgRGVzY3JpcHRvcjoKPiDCoGJMZW5ndGgg
wqAgwqAgwqAgwqAgwqAgwqAgwqAgwqAxOAo+IMKgYkRlc2NyaXB0b3JUeXBlIMKgIMKgIMKgIMKg
IDEKPiDCoGJjZFVTQiDCoCDCoCDCoCDCoCDCoCDCoCDCoCAyLjAwCj4gwqBiRGV2aWNlQ2xhc3Mg
wqAgwqAgwqAgwqAgwqAgwqAwIChEZWZpbmVkIGF0IEludGVyZmFjZSBsZXZlbCkKPiDCoGJEZXZp
Y2VTdWJDbGFzcyDCoCDCoCDCoCDCoCAwCj4gwqBiRGV2aWNlUHJvdG9jb2wgwqAgwqAgwqAgwqAg
MAo+IMKgYk1heFBhY2tldFNpemUwIMKgIMKgIMKgIMKgNjQKPiDCoGlkVmVuZG9yIMKgIMKgIMKg
IMKgIMKgIDB4MTQ4ZiBSYWxpbmsgVGVjaG5vbG9neSwgQ29ycC4KPiDCoGlkUHJvZHVjdCDCoCDC
oCDCoCDCoCDCoDB4MzA3MAo+IMKgYmNkRGV2aWNlIMKgIMKgIMKgIMKgIMKgIMKgMS4wMQo+IMKg
aU1hbnVmYWN0dXJlciDCoCDCoCDCoCDCoCDCoCAxIFJhbGluawo+IMKgaVByb2R1Y3QgwqAgwqAg
wqAgwqAgwqAgwqAgwqAgwqAyIDgwMi4xMSBuIFdMQU4KPiDCoGlTZXJpYWwgwqAgwqAgwqAgwqAg
wqAgwqAgwqAgwqAgMwo+IMKgYk51bUNvbmZpZ3VyYXRpb25zIMKgIMKgIMKgMQo+IMKgQ29uZmln
dXJhdGlvbiBEZXNjcmlwdG9yOgo+IMKgIMKgYkxlbmd0aCDCoCDCoCDCoCDCoCDCoCDCoCDCoCDC
oCA5Cj4gwqAgwqBiRGVzY3JpcHRvclR5cGUgwqAgwqAgwqAgwqAgMgo+IMKgIMKgd1RvdGFsTGVu
Z3RoIMKgIMKgIMKgIMKgIMKgIDY3Cj4gwqAgwqBiTnVtSW50ZXJmYWNlcyDCoCDCoCDCoCDCoCDC
oDEKPiDCoCDCoGJDb25maWd1cmF0aW9uVmFsdWUgwqAgwqAgMQo+IMKgIMKgaUNvbmZpZ3VyYXRp
b24gwqAgwqAgwqAgwqAgwqAwCj4gwqAgwqBibUF0dHJpYnV0ZXMgwqAgwqAgwqAgwqAgMHg4MAo+
IMKgIMKgIMKgKEJ1cyBQb3dlcmVkKQo+IMKgIMKgTWF4UG93ZXIgwqAgwqAgwqAgwqAgwqAgwqAg
wqA0NTBtQQo+IMKgIMKgSW50ZXJmYWNlIERlc2NyaXB0b3I6Cj4gwqAgwqAgwqBiTGVuZ3RoIMKg
IMKgIMKgIMKgIMKgIMKgIMKgIMKgIDkKPiDCoCDCoCDCoGJEZXNjcmlwdG9yVHlwZSDCoCDCoCDC
oCDCoCA0Cj4gwqAgwqAgwqBiSW50ZXJmYWNlTnVtYmVyIMKgIMKgIMKgIMKgMAo+IMKgIMKgIMKg
YkFsdGVybmF0ZVNldHRpbmcgwqAgwqAgwqAgMAo+IMKgIMKgIMKgYk51bUVuZHBvaW50cyDCoCDC
oCDCoCDCoCDCoCA3Cj4gwqAgwqAgwqBiSW50ZXJmYWNlQ2xhc3MgwqAgwqAgwqAgMjU1IFZlbmRv
ciBTcGVjaWZpYyBDbGFzcwo+IMKgIMKgIMKgYkludGVyZmFjZVN1YkNsYXNzIMKgIMKgMjU1IFZl
bmRvciBTcGVjaWZpYyBTdWJjbGFzcwo+IMKgIMKgIMKgYkludGVyZmFjZVByb3RvY29sIMKgIMKg
MjU1IFZlbmRvciBTcGVjaWZpYyBQcm90b2NvbAo+IMKgIMKgIMKgaUludGVyZmFjZSDCoCDCoCDC
oCDCoCDCoCDCoCDCoDUKPiDCoCDCoCDCoEVuZHBvaW50IERlc2NyaXB0b3I6Cj4gwqAgwqAgwqAg
wqBiTGVuZ3RoIMKgIMKgIMKgIMKgIMKgIMKgIMKgIMKgIDcKPiDCoCDCoCDCoCDCoGJEZXNjcmlw
dG9yVHlwZSDCoCDCoCDCoCDCoCA1Cj4gwqAgwqAgwqAgwqBiRW5kcG9pbnRBZGRyZXNzIMKgIMKg
IDB4ODEgwqBFUCAxIElOCj4gwqAgwqAgwqAgwqBibUF0dHJpYnV0ZXMgwqAgwqAgwqAgwqAgwqAg
wqAyCj4gwqAgwqAgwqAgwqAgwqBUcmFuc2ZlciBUeXBlIMKgIMKgIMKgIMKgIMKgIMKgQnVsawo+
IMKgIMKgIMKgIMKgIMKgU3luY2ggVHlwZSDCoCDCoCDCoCDCoCDCoCDCoCDCoCBOb25lCj4gwqAg
wqAgwqAgwqAgwqBVc2FnZSBUeXBlIMKgIMKgIMKgIMKgIMKgIMKgIMKgIERhdGEKPiDCoCDCoCDC
oCDCoHdNYXhQYWNrZXRTaXplIMKgIMKgIDB4MDIwMCDCoDF4IDUxMiBieXRlcwo+IMKgIMKgIMKg
IMKgYkludGVydmFsIMKgIMKgIMKgIMKgIMKgIMKgIMKgIDAKPiDCoCDCoCDCoEVuZHBvaW50IERl
c2NyaXB0b3I6Cj4gwqAgwqAgwqAgwqBiTGVuZ3RoIMKgIMKgIMKgIMKgIMKgIMKgIMKgIMKgIDcK
PiDCoCDCoCDCoCDCoGJEZXNjcmlwdG9yVHlwZSDCoCDCoCDCoCDCoCA1Cj4gwqAgwqAgwqAgwqBi
RW5kcG9pbnRBZGRyZXNzIMKgIMKgIDB4MDEgwqBFUCAxIE9VVAo+IMKgIMKgIMKgIMKgYm1BdHRy
aWJ1dGVzIMKgIMKgIMKgIMKgIMKgIMKgMgo+IMKgIMKgIMKgIMKgIMKgVHJhbnNmZXIgVHlwZSDC
oCDCoCDCoCDCoCDCoCDCoEJ1bGsKPiDCoCDCoCDCoCDCoCDCoFN5bmNoIFR5cGUgwqAgwqAgwqAg
wqAgwqAgwqAgwqAgTm9uZQo+IMKgIMKgIMKgIMKgIMKgVXNhZ2UgVHlwZSDCoCDCoCDCoCDCoCDC
oCDCoCDCoCBEYXRhCj4gwqAgwqAgwqAgwqB3TWF4UGFja2V0U2l6ZSDCoCDCoCAweDAyMDAgwqAx
eCA1MTIgYnl0ZXMKPiDCoCDCoCDCoCDCoGJJbnRlcnZhbCDCoCDCoCDCoCDCoCDCoCDCoCDCoCAw
Cj4gwqAgwqAgwqBFbmRwb2ludCBEZXNjcmlwdG9yOgo+IMKgIMKgIMKgIMKgYkxlbmd0aCDCoCDC
oCDCoCDCoCDCoCDCoCDCoCDCoCA3Cj4gwqAgwqAgwqAgwqBiRGVzY3JpcHRvclR5cGUgwqAgwqAg
wqAgwqAgNQo+IMKgIMKgIMKgIMKgYkVuZHBvaW50QWRkcmVzcyDCoCDCoCAweDAyIMKgRVAgMiBP
VVQKPiDCoCDCoCDCoCDCoGJtQXR0cmlidXRlcyDCoCDCoCDCoCDCoCDCoCDCoDIKPiDCoCDCoCDC
oCDCoCDCoFRyYW5zZmVyIFR5cGUgwqAgwqAgwqAgwqAgwqAgwqBCdWxrCj4gwqAgwqAgwqAgwqAg
wqBTeW5jaCBUeXBlIMKgIMKgIMKgIMKgIMKgIMKgIMKgIE5vbmUKPiDCoCDCoCDCoCDCoCDCoFVz
YWdlIFR5cGUgwqAgwqAgwqAgwqAgwqAgwqAgwqAgRGF0YQo+IMKgIMKgIMKgIMKgd01heFBhY2tl
dFNpemUgwqAgwqAgMHgwMjAwIMKgMXggNTEyIGJ5dGVzCj4gwqAgwqAgwqAgwqBiSW50ZXJ2YWwg
wqAgwqAgwqAgwqAgwqAgwqAgwqAgMAo+IMKgIMKgIMKgRW5kcG9pbnQgRGVzY3JpcHRvcjoKPiDC
oCDCoCDCoCDCoGJMZW5ndGggwqAgwqAgwqAgwqAgwqAgwqAgwqAgwqAgNwo+IMKgIMKgIMKgIMKg
YkRlc2NyaXB0b3JUeXBlIMKgIMKgIMKgIMKgIDUKPiDCoCDCoCDCoCDCoGJFbmRwb2ludEFkZHJl
c3MgwqAgwqAgMHgwMyDCoEVQIDMgT1VUCj4gwqAgwqAgwqAgwqBibUF0dHJpYnV0ZXMgwqAgwqAg
wqAgwqAgwqAgwqAyCj4gwqAgwqAgwqAgwqAgwqBUcmFuc2ZlciBUeXBlIMKgIMKgIMKgIMKgIMKg
IMKgQnVsawo+IMKgIMKgIMKgIMKgIMKgU3luY2ggVHlwZSDCoCDCoCDCoCDCoCDCoCDCoCDCoCBO
b25lCj4gwqAgwqAgwqAgwqAgwqBVc2FnZSBUeXBlIMKgIMKgIMKgIMKgIMKgIMKgIMKgIERhdGEK
PiDCoCDCoCDCoCDCoHdNYXhQYWNrZXRTaXplIMKgIMKgIDB4MDIwMCDCoDF4IDUxMiBieXRlcwo+
IMKgIMKgIMKgIMKgYkludGVydmFsIMKgIMKgIMKgIMKgIMKgIMKgIMKgIDAKPiDCoCDCoCDCoEVu
ZHBvaW50IERlc2NyaXB0b3I6Cj4gwqAgwqAgwqAgwqBiTGVuZ3RoIMKgIMKgIMKgIMKgIMKgIMKg
IMKgIMKgIDcKPiDCoCDCoCDCoCDCoGJEZXNjcmlwdG9yVHlwZSDCoCDCoCDCoCDCoCA1Cj4gwqAg
wqAgwqAgwqBiRW5kcG9pbnRBZGRyZXNzIMKgIMKgIDB4MDQgwqBFUCA0IE9VVAo+IMKgIMKgIMKg
IMKgYm1BdHRyaWJ1dGVzIMKgIMKgIMKgIMKgIMKgIMKgMgo+IMKgIMKgIMKgIMKgIMKgVHJhbnNm
ZXIgVHlwZSDCoCDCoCDCoCDCoCDCoCDCoEJ1bGsKPiDCoCDCoCDCoCDCoCDCoFN5bmNoIFR5cGUg
wqAgwqAgwqAgwqAgwqAgwqAgwqAgTm9uZQo+IMKgIMKgIMKgIMKgIMKgVXNhZ2UgVHlwZSDCoCDC
oCDCoCDCoCDCoCDCoCDCoCBEYXRhCj4gwqAgwqAgwqAgwqB3TWF4UGFja2V0U2l6ZSDCoCDCoCAw
eDAyMDAgwqAxeCA1MTIgYnl0ZXMKPiDCoCDCoCDCoCDCoGJJbnRlcnZhbCDCoCDCoCDCoCDCoCDC
oCDCoCDCoCAwCj4gwqAgwqAgwqBFbmRwb2ludCBEZXNjcmlwdG9yOgo+IMKgIMKgIMKgIMKgYkxl
bmd0aCDCoCDCoCDCoCDCoCDCoCDCoCDCoCDCoCA3Cj4gwqAgwqAgwqAgwqBiRGVzY3JpcHRvclR5
cGUgwqAgwqAgwqAgwqAgNQo+IMKgIMKgIMKgIMKgYkVuZHBvaW50QWRkcmVzcyDCoCDCoCAweDA1
IMKgRVAgNSBPVVQKPiDCoCDCoCDCoCDCoGJtQXR0cmlidXRlcyDCoCDCoCDCoCDCoCDCoCDCoDIK
PiDCoCDCoCDCoCDCoCDCoFRyYW5zZmVyIFR5cGUgwqAgwqAgwqAgwqAgwqAgwqBCdWxrCj4gwqAg
wqAgwqAgwqAgwqBTeW5jaCBUeXBlIMKgIMKgIMKgIMKgIMKgIMKgIMKgIE5vbmUKPiDCoCDCoCDC
oCDCoCDCoFVzYWdlIFR5cGUgwqAgwqAgwqAgwqAgwqAgwqAgwqAgRGF0YQo+IMKgIMKgIMKgIMKg
d01heFBhY2tldFNpemUgwqAgwqAgMHgwMjAwIMKgMXggNTEyIGJ5dGVzCj4gwqAgwqAgwqAgwqBi
SW50ZXJ2YWwgwqAgwqAgwqAgwqAgwqAgwqAgwqAgMAo+IMKgIMKgIMKgRW5kcG9pbnQgRGVzY3Jp
cHRvcjoKPiDCoCDCoCDCoCDCoGJMZW5ndGggwqAgwqAgwqAgwqAgwqAgwqAgwqAgwqAgNwo+IMKg
IMKgIMKgIMKgYkRlc2NyaXB0b3JUeXBlIMKgIMKgIMKgIMKgIDUKPiDCoCDCoCDCoCDCoGJFbmRw
b2ludEFkZHJlc3MgwqAgwqAgMHgwNiDCoEVQIDYgT1VUCj4gwqAgwqAgwqAgwqBibUF0dHJpYnV0
ZXMgwqAgwqAgwqAgwqAgwqAgwqAyCj4gwqAgwqAgwqAgwqAgwqBUcmFuc2ZlciBUeXBlIMKgIMKg
IMKgIMKgIMKgIMKgQnVsawo+IMKgIMKgIMKgIMKgIMKgU3luY2ggVHlwZSDCoCDCoCDCoCDCoCDC
oCDCoCDCoCBOb25lCj4gwqAgwqAgwqAgwqAgwqBVc2FnZSBUeXBlIMKgIMKgIMKgIMKgIMKgIMKg
IMKgIERhdGEKPiDCoCDCoCDCoCDCoHdNYXhQYWNrZXRTaXplIMKgIMKgIDB4MDIwMCDCoDF4IDUx
MiBieXRlcwo+IMKgIMKgIMKgIMKgYkludGVydmFsIMKgIMKgIMKgIMKgIMKgIMKgIMKgIDAKPiBE
ZXZpY2UgUXVhbGlmaWVyIChmb3Igb3RoZXIgZGV2aWNlIHNwZWVkKToKPiDCoGJMZW5ndGggwqAg
wqAgwqAgwqAgwqAgwqAgwqAgwqAxMAo+IMKgYkRlc2NyaXB0b3JUeXBlIMKgIMKgIMKgIMKgIDYK
PiDCoGJjZFVTQiDCoCDCoCDCoCDCoCDCoCDCoCDCoCAyLjAwCj4gwqBiRGV2aWNlQ2xhc3MgwqAg
wqAgwqAgwqAgwqAgwqAwIChEZWZpbmVkIGF0IEludGVyZmFjZSBsZXZlbCkKPiDCoGJEZXZpY2VT
dWJDbGFzcyDCoCDCoCDCoCDCoCAwCj4gwqBiRGV2aWNlUHJvdG9jb2wgwqAgwqAgwqAgwqAgMAo+
IMKgYk1heFBhY2tldFNpemUwIMKgIMKgIMKgIMKgNjQKPiDCoGJOdW1Db25maWd1cmF0aW9ucyDC
oCDCoCDCoDEKPiBEZXZpY2UgU3RhdHVzOiDCoCDCoCAweDAwMDAKPiDCoChCdXMgUG93ZXJlZCkK
Pgo=

^ permalink raw reply

* Re: 2.6.31.[12] ath5k regression
From: Richard Zidlicky @ 2009-10-11 12:26 UTC (permalink / raw)
  To: Bob Copeland; +Cc: linux-wireless
In-Reply-To: <20091010125824.GA18841@hash.localnet>

On Sat, Oct 10, 2009 at 08:58:24AM -0400, Bob Copeland wrote:
> On Fri, Oct 09, 2009 at 04:39:22PM +0200, Richard Zidlicky wrote:
> > -       ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, true);
> > +       ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, chan != NULL);
> >         if (ret) {
> >                 ATH5K_ERR(sc, "can't reset hardware (%d)\n", ret);
> >                 goto err;
> 
> So, this change effectively just ensures we now program the pcu registers
> at startup (every other time chan should not be null).  So my guess is
> programming the pcu actually causes some problem.  Can you try this patch
> instead?

thanks, compiling it right now. Not quite sure - which version of this
> > -       ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, true);
> > +       ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, chan != NULL);

is it supposed to be tested with?

Richard

^ permalink raw reply

* Re: [PATCH 1/3] iwmc3200top: Add Intel Wireless MultiCom 3200 top driver.
From: Tomas Winkler @ 2009-10-11 13:07 UTC (permalink / raw)
  To: David Miller
  Cc: linville, netdev, linux-wireless, linux-mmc, yi.zhu,
	inaky.perez-gonzalez, cindy.h.kao, guy.cohen, ron.rindjunsky
In-Reply-To: <20091011.033647.66923614.davem@davemloft.net>

On Sun, Oct 11, 2009 at 12:36 PM, David Miller <davem@davemloft.net> wrote:
> From: Tomas Winkler <tomasw@gmail.com>
> Date: Sun, 11 Oct 2009 10:05:20 +0200
>
>> Just close my eyes and there is new game to play. :)
>> It's not in the patchwork, so is there any reason you are not planning
>> to add it.   The patch intention was for net-next, it looks like
>> I didn't mark it as such, my fault.
>
> Because there still seems to be some confusion between which of these
> bits go through John Linville as a wireless driver and which bits
> go directly through me.
>
> By default I assume John picks up "wireless" drivers and send them
> to me in his wireless merges to me.
>
> If that's not the case, explicitly do a fresh submission of this patch
> and explicitly ask me to merge it.

Thanks, got it know.
Since this goes for wimax and wifi I'd like to submit it directly to netdev.
I will resolve Marcel's comments and  submit again.
Thanks
Tomas

^ permalink raw reply

* [PATCH] mac80211: document ieee80211_rx() context requirement
From: Johannes Berg @ 2009-10-11 13:10 UTC (permalink / raw)
  To: John Linville; +Cc: linux-wireless

ieee80211_rx() must be called with softirqs disabled
since the networking stack requires this for netif_rx()
and some code in mac80211 can assume that it can not
be processing its own tasklet and this call at the same
time.

It may be possible to remove this requirement after a
careful audit of mac80211 and doing any needed locking
improvements in it along with disabling softirqs around
netif_rx(). An alternative might be to push all packet
processing to process context in mac80211, instead of
to the tasklet, and add other synchronisation.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/net/mac80211.h |    2 ++
 net/mac80211/rx.c      |    2 ++
 2 files changed, 4 insertions(+)

--- wireless-testing.orig/include/net/mac80211.h	2009-10-11 12:19:54.000000000 +0200
+++ wireless-testing/include/net/mac80211.h	2009-10-11 12:20:41.000000000 +0200
@@ -1669,6 +1669,8 @@ void ieee80211_restart_hw(struct ieee802
  * to this function and ieee80211_rx_irqsafe() may not be mixed for a
  * single hardware.
  *
+ * Note that right now, this function must be called with softirqs disabled.
+ *
  * @hw: the hardware this frame came in on
  * @skb: the buffer to receive, owned by mac80211 after this call
  */
--- wireless-testing.orig/net/mac80211/rx.c	2009-10-11 12:24:39.000000000 +0200
+++ wireless-testing/net/mac80211/rx.c	2009-10-11 12:38:22.000000000 +0200
@@ -2453,6 +2453,8 @@ void ieee80211_rx(struct ieee80211_hw *h
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 
+	WARN_ON_ONCE(softirq_count() == 0);
+
 	if (WARN_ON(status->band < 0 ||
 		    status->band >= IEEE80211_NUM_BANDS))
 		goto drop;



^ permalink raw reply

* Re: 2.6.31.[12] ath5k regression
From: Bob Copeland @ 2009-10-11 13:30 UTC (permalink / raw)
  To: Richard Zidlicky; +Cc: linux-wireless
In-Reply-To: <20091011122616.GA27105@linux-m68k.org>

On Sun, Oct 11, 2009 at 02:26:16PM +0200, Richard Zidlicky wrote:
> thanks, compiling it right now. Not quite sure - which version of this
> > > -       ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, true);
> > > +       ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, chan != NULL);
> 
> is it supposed to be tested with?

The "chan != NULL" case.  The patch should apply against latest
wireless-testing (but will probably work with linus-2.6).

-- 
Bob Copeland %% www.bobcopeland.com


^ permalink raw reply

* [RFC] p54pci: skb_over_panic, soft lockup, stall under flood
From: Quintin Pitts @ 2009-10-11 14:28 UTC (permalink / raw)
  To: John Linville; +Cc: linux-wireless, Christian Lamparter

Hi,

Sorry for my lack of experience in all aspects - first time
submitting!!!

In trying to get p54pci driver to be stable on my platform and hardware
- here is a generic patch that seems to accomplish that.  Since the
ViewSonic V210 uses the IT8152 pci bridge - some attention was needed to
get dma related allocation in the first physical 64M.  I have verified
that the dma related allocation is in the first 64M and dmabounce is not
being used - just for those wondering if that was part of the problems.

Platform: ViewSonic V210 arm pxa255
Kernel 2.6.30.5 eabi
Wireless Drivers from compat-wireless-2009-09-30 and what I applied the below patch to.
Firmware used: FW rev 2.13.12.0 - Softmac protocol 5.9

Wireless card: GemTek WL-850FJB minipci card.

phy0: p54 detected a LM86 firmware
p54: rx_mtu reduced from 3240 to 2376
phy0: FW rev 2.13.12.0 - Softmac protocol 5.9
phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
phy0: hwaddr 00:90:4b:c1:06:bc, MAC:isl3890 RF:Frisbee
phy0: Selected rate control algorithm 'minstrel'

device pci info (lspci -v):

00:06.0 Network controller: Intersil Corporation ISL3886 [Prism Javelin/Prism Xbow] (rev 01)
Subsystem: Intersil Corporation Device 0000
Flags: bus master, medium devsel, latency 56, IRQ 217
Memory at 11000000 (32-bit, non-prefetchable) [size=8K]
Capabilities: [dc] Power Management version 1
Kernel driver in use: p54pci
Kernel modules: prism54, p54pci

Reasons for patch was to solve the below problems.

1.  p54p_check_rx_ring - skb_over_panic: Under a ping flood or just left
running for a bit would panic with a skb_over_panic. Investigation
showed for some odd reason the device/firmware instead of writing a
length in the data rx_ring (desc->len) had instead written the whole dma
address (host->host_addr) into location of the len/flag (host->len and
host->flags) spot and the same dma address that was in the ring.  Added
the following condition in p54p_check_rx_ring to trap that condition and
trim the skb reset the len and flags only.  By the way - I used haret to
see if it I could prove it happening under wince - located the dma
memory that was being used for rings - and also happening under windows
ce with the  len/flag being set to the same as the host dma.  Scanning
the ring at 1000 times per second (I think)  In a flood or iperf.  Would
see an occasional len/flag location get set to the same host address in
that ring - may only happen a few times every minute.  Under normal
operation maybe a few times a day.

   if(unlikely(len == (desc->host_addr & 0xffff)
   && (desc->flags == ((desc->host_addr & 0xffff0000) >> 16))) )

2.  p54p_refill_rx_ring - eventual stall: Has the potential in very busy
(flood) to over run the last rx data processed ring index corrupting the
next rings - causing some havoc of getting some 13 indexes difference
between priv->rx_idx_data and ring_control host_idx on a 8 index ring.
This appears to eventually fill up the TX queue - returning a -ENOSPC in
p54_assign_address (txrx.c) because of ring corruption missing some TX
releases.  Changed p54p_refill_rx_ring to take a index parm and use that
as the last processed ring index - instead of the using the ring_control
device_idx.

3.  p54p_check_rx_ring - eventual stall: On ping flood - Control
P54_CONTROL_TYPE_TXDONE rx packets that are skb reused - seem to cause a
problem on the next time around with the same index.   Even though the
length was not the same was still being seen as a
P54_CONTROL_TYPE_TXDONE packet again. Side affects varied - one being
the main end result same as the #2 listed above TX not being released
and returning a -ENOSPC in p54_assign_address (txrx.c) - stall.
Problem went away if did not reuse the skb but unmap it and
dev_kfree_skb if return was zero from p54_rx. Still unclear why this
would be - but had no problems with patch afterwards.

4.  p54p_check_rx_ring - soft lockup in p54p_refill_rx_ring.  This only
occurred when 5 minute iperf on a fast wireless network - Or 1 to 2 days
of unit left up.  Discovered that the device had lost it's mind and set
the ring_control->device_index[ring_index] exactly 0xFF or 255 less than
it should be (ram issue??) don't know.  Happens on three of my devices
the same way.  If left to continue - the p54p_refill_rx_ring while loop
goes negative and soft lockup.  Trap and return if device_idx - (*index)
greater than ring_index.  Error is only tripped the one time - meaning
the next time p54p_check_rx_ring is called the device index is back to
what it should have been.

5.  p54p_open   - 1 out of 10 boots will produce device does not
respond! or Cannot boot firmware!.    Minor - but frustrating all the
same.
Always rmmod p54pci and then modprobe p54pci works.  It seems if get a
error on p54p_open trying again works.  And if p54_read_eeprom fails -
trying again works.

The below was applied to compat-wireless-2009-09-30:

Thanks,

Quintin.

Signed-off-by: Quintin Pitts <geek4linux@gmail.com>

--- 

--- a/drivers/net/wireless/p54/p54pci.c	2009-09-29 23:13:58.000000000 -0500
+++ b/drivers/net/wireless/p54/p54pci.c	2009-10-09 08:15:58.000000000 -0500
@@ -131,7 +131,7 @@ static int p54p_upload_firmware(struct i
 
 static void p54p_refill_rx_ring(struct ieee80211_hw *dev,
 	int ring_index, struct p54p_desc *ring, u32 ring_limit,
-	struct sk_buff **rx_buf)
+	struct sk_buff **rx_buf, u32 index)
 {
 	struct p54p_priv *priv = dev->priv;
 	struct p54p_ring_control *ring_control = priv->ring_control;
@@ -139,7 +139,11 @@ static void p54p_refill_rx_ring(struct i
 
 	idx = le32_to_cpu(ring_control->host_idx[ring_index]);
 	limit = idx;
-	limit -= le32_to_cpu(ring_control->device_idx[ring_index]);
+/*
+ *           Use last processed index instead of device_idx
+ *           so we don't corrupt our ring 
+ */
+	limit -= le32_to_cpu(index);
 	limit = ring_limit - limit;
 
 	i = idx % ring_limit;
@@ -181,9 +185,26 @@ static void p54p_check_rx_ring(struct ie
 	struct p54p_ring_control *ring_control = priv->ring_control;
 	struct p54p_desc *desc;
 	u32 idx, i;
+	int ret;
 
+	idx = le32_to_cpu(ring_control->device_idx[ring_index]);
 	i = (*index) % ring_limit;
-	(*index) = idx = le32_to_cpu(ring_control->device_idx[ring_index]);
+	if(unlikely((idx - (*index)) > ring_limit || 
+ (le32_to_cpu(ring_control->host_idx[ring_index]) - (*index)) > ring_limit)) { 
+  	printk(KERN_DEBUG "%s: devidx jumped *index=%d devidx=%d hostidx=%d ring_limit=%d\n",
+	__func__,(*index),idx,ring_control->host_idx[ring_index],ring_limit);
+/* 
+ * Do nothing things are really wrong - device index has jumped got corrupted
+ *  - wait for it to stabilize 
+ * So far device idx exactly 0xFF (255) bytes less than what it should be. 
+ * only seen to happen on very fast wireless and packet floods and/or iperf test
+ * In testing this error only encountered once - so next time around the 
+ * device index is correct.
+ * if to continue would soft lockup/hang in while loop in p54p_refill_rx_ring
+ */
+		return;
+		}
+	(*index) = idx;
 	idx %= ring_limit;
 	while (i != idx) {
 		u16 len;
@@ -197,25 +218,40 @@ static void p54p_check_rx_ring(struct ie
 			i %= ring_limit;
 			continue;
 		}
+		if(unlikely(len == (desc->host_addr & 0xffff) 
+	&& (desc->flags == ((desc->host_addr & 0xffff0000) >> 16))) ) {
+/* device has put device dma in desc len/flag location - will crash in skb_put
+ * desc->len and desc->flags contain the host_addr -
+ * trap before skb_put and discard
+ * ViewSonic V210 and wireless card GENTEK WL-850 , IT8152 PCI bridge 
+ * happens occasionally - no clear reason or frequency.
+ *  
+ */ 
+		printk(KERN_DEBUG "%s: rx_ring len/flags has address - skipping!\n",__func__); 
+                  skb_trim(skb,0);
+		  desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
+		  desc->flags=0;
+                 
+		} else {
+
 		skb_put(skb, len);
 
-		if (p54_rx(dev, skb)) {
-			pci_unmap_single(priv->pdev,
+		ret=p54_rx(dev,skb);
+		pci_unmap_single(priv->pdev,
 					 le32_to_cpu(desc->host_addr),
 					 priv->common.rx_mtu + 32,
 					 PCI_DMA_FROMDEVICE);
-			rx_buf[i] = NULL;
-			desc->host_addr = 0;
-		} else {
-			skb_trim(skb, 0);
-			desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
-		}
+		if(ret==0)
+			dev_kfree_skb(skb);
+		rx_buf[i] = NULL;
+		desc->host_addr = 0;
+		} /* end of desc->len skb corrupt crash test */
 
 		i++;
 		i %= ring_limit;
 	}
 
-	p54p_refill_rx_ring(dev, ring_index, ring, ring_limit, rx_buf);
+	p54p_refill_rx_ring(dev, ring_index, ring, ring_limit, rx_buf, (*index));
 }
 
 /* caller must hold priv->lock */
@@ -428,10 +464,10 @@ static int p54p_open(struct ieee80211_hw
 	priv->rx_idx_mgmt = priv->tx_idx_mgmt = 0;
 
 	p54p_refill_rx_ring(dev, 0, priv->ring_control->rx_data,
-		ARRAY_SIZE(priv->ring_control->rx_data), priv->rx_buf_data);
+		ARRAY_SIZE(priv->ring_control->rx_data), priv->rx_buf_data, 0);
 
 	p54p_refill_rx_ring(dev, 2, priv->ring_control->rx_mgmt,
-		ARRAY_SIZE(priv->ring_control->rx_mgmt), priv->rx_buf_mgmt);
+		ARRAY_SIZE(priv->ring_control->rx_mgmt), priv->rx_buf_mgmt, 0);
 
 	P54P_WRITE(ring_control_base, cpu_to_le32(priv->ring_control_dma));
 	P54P_READ(ring_control_base);
@@ -550,9 +586,26 @@ static int __devinit p54p_probe(struct p
 	}
 
 	err = p54p_open(dev);
-	if (err)
-		goto err_free_common;
+	if (err) {
+                
+		printk(KERN_DEBUG "%s: p54p_open failed - trying again\n",__func__);
+                msleep(10);
+		err = p54p_open(dev);
+		if (err)
+			goto err_free_common;
+        }
 	err = p54_read_eeprom(dev);
+	if (err)
+	{
+                printk(KERN_DEBUG "%s: p54_read_eeprom failed - trying again\n",__func__);
+		p54p_stop(dev);
+		err = p54p_open(dev);
+                if (err)
+			goto err_free_common;
+		msleep(10);
+		err = p54_read_eeprom(dev);
+             
+	}
 	p54p_stop(dev);
 	if (err)
 		goto err_free_common;


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox