Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 8/8 V2] rtlwifi: Remove variable 'noise' from rtl_stats struct
From: Larry Finger @ 2013-09-16 18:55 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA, Larry Finger,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1379357722-17687-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

All uses of this variable have been removed. Now delete it from the struct
and from a few places that use it in commented-out lines.

Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
---
 drivers/net/wireless/rtlwifi/rtl8188ee/trx.c | 1 -
 drivers/net/wireless/rtlwifi/rtl8192ce/trx.c | 1 -
 drivers/net/wireless/rtlwifi/rtl8192se/trx.c | 1 -
 drivers/net/wireless/rtlwifi/rtl8723ae/trx.c | 1 -
 drivers/net/wireless/rtlwifi/wifi.h          | 1 -
 5 files changed, 5 deletions(-)

diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c b/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c
index a8871d6..5e628ab 100644
--- a/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c
@@ -477,7 +477,6 @@ bool rtl88ee_rx_query_desc(struct ieee80211_hw *hw,
 
 	/*rx_status->qual = status->signal; */
 	rx_status->signal = status->recvsignalpower + 10;
-	/*rx_status->noise = -status->noise; */
 	if (status->packet_report_type == TX_REPORT2) {
 		status->macid_valid_entry[0] =
 			 GET_RX_RPT2_DESC_MACID_VALID_1(pdesc);
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
index 6ad23b4..52abf0a 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
@@ -420,7 +420,6 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw,
 
 	/*rx_status->qual = stats->signal; */
 	rx_status->signal = stats->recvsignalpower + 10;
-	/*rx_status->noise = -stats->noise; */
 
 	return true;
 }
diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c
index c709511..222d2e7 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c
@@ -330,7 +330,6 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats,
 
 	/*rx_status->qual = stats->signal; */
 	rx_status->signal = stats->rssi + 10;
-	/*rx_status->noise = -stats->noise; */
 
 	return true;
 }
diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c
index c72758d..e85ec21 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c
@@ -359,7 +359,6 @@ bool rtl8723ae_rx_query_desc(struct ieee80211_hw *hw,
 
 	/*rx_status->qual = status->signal; */
 	rx_status->signal = status->recvsignalpower + 10;
-	/*rx_status->noise = -status->noise; */
 
 	return true;
 }
diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h
index cc03e7c..284ee8d 100644
--- a/drivers/net/wireless/rtlwifi/wifi.h
+++ b/drivers/net/wireless/rtlwifi/wifi.h
@@ -1535,7 +1535,6 @@ struct rtl_stats {
 	u32 mac_time[2];
 	s8 rssi;
 	u8 signal;
-	u8 noise;
 	u8 rate;		/* hw desc rate */
 	u8 received_channel;
 	u8 control;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 6/8 V2] rtlwifi: Fix smatch warnings in usb.c
From: Larry Finger @ 2013-09-16 18:55 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA, Larry Finger,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1379357722-17687-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

Smatch displays the following:
  CHECK   drivers/net/wireless/rtlwifi/usb.c
drivers/net/wireless/rtlwifi/usb.c:458 _rtl_usb_rx_process_agg() warn: assigning (-98) to unsigned variable 'stats.noise'
drivers/net/wireless/rtlwifi/usb.c:503 _rtl_usb_rx_process_noagg() warn: assigning (-98) to unsigned variable 'stats.noise'
drivers/net/wireless/rtlwifi/usb.c:596 _rtl_rx_get_padding() info: ignoring unreachable code.
drivers/net/wireless/rtlwifi/usb.c:596 _rtl_rx_get_padding() info: ignoring unreachable code.

The negative number to an unsigned quantity is fixed by removing the variable
as it is no longer used. The unreachable code info is fixed by including the
appropriate section inside #ifdef .. #endif constructions.

Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
---
 drivers/net/wireless/rtlwifi/usb.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c
index e56778c..60cb0b4 100644
--- a/drivers/net/wireless/rtlwifi/usb.c
+++ b/drivers/net/wireless/rtlwifi/usb.c
@@ -455,7 +455,6 @@ static void _rtl_usb_rx_process_agg(struct ieee80211_hw *hw,
 	struct ieee80211_rx_status rx_status = {0};
 	struct rtl_stats stats = {
 		.signal = 0,
-		.noise = -98,
 		.rate = 0,
 	};
 
@@ -498,7 +497,6 @@ static void _rtl_usb_rx_process_noagg(struct ieee80211_hw *hw,
 	struct ieee80211_rx_status rx_status = {0};
 	struct rtl_stats stats = {
 		.signal = 0,
-		.noise = -98,
 		.rate = 0,
 	};
 
@@ -582,12 +580,15 @@ static void _rtl_rx_work(unsigned long param)
 static unsigned int _rtl_rx_get_padding(struct ieee80211_hdr *hdr,
 					unsigned int len)
 {
+#if NET_IP_ALIGN != 0
 	unsigned int padding = 0;
+#endif
 
 	/* make function no-op when possible */
-	if (NET_IP_ALIGN == 0 || len < sizeof(*hdr))
+	if (NET_IP_ALIGN == 0 || len < sizeof(struct ieee80211_hdr))
 		return 0;
 
+#if NET_IP_ALIGN != 0
 	/* alignment calculation as in lbtf_rx() / carl9170_rx_copy_data() */
 	/* TODO: deduplicate common code, define helper function instead? */
 
@@ -608,6 +609,7 @@ static unsigned int _rtl_rx_get_padding(struct ieee80211_hdr *hdr,
 		padding ^= NET_IP_ALIGN;
 
 	return padding;
+#endif
 }
 
 #define __RADIO_TAP_SIZE_RSV	32
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 5/8 V2] [PATCH 5/7: rtlwifi: Fix smatch warning in pci.c
From: Larry Finger @ 2013-09-16 18:55 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA, Larry Finger,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1379357722-17687-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

Smatch reports the following:
  CHECK   drivers/net/wireless/rtlwifi/pci.c
drivers/net/wireless/rtlwifi/pci.c:739 _rtl_pci_rx_interrupt() warn: assigning (-98) to unsigned variable 'stats.noise'

This variable is not used and is removed.

Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
---
 drivers/net/wireless/rtlwifi/pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
index 703f839..6295ed2 100644
--- a/drivers/net/wireless/rtlwifi/pci.c
+++ b/drivers/net/wireless/rtlwifi/pci.c
@@ -736,7 +736,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
 
 	struct rtl_stats stats = {
 		.signal = 0,
-		.noise = -98,
 		.rate = 0,
 	};
 	int index = rtlpci->rx_ring[rx_queue_idx].idx;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 4/8 V2] rtlwifi: rtl8192_common: Fix smatch errors and warnings in rtl8192c/dm_common.c
From: Larry Finger @ 2013-09-16 18:55 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA, Larry Finger,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1379357722-17687-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

Smatch lists the following:
  CHECK   drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:551 rtl92c_dm_pwdb_monitor() info: ignoring unreachable code.
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:551 rtl92c_dm_pwdb_monitor() info: ignoring unreachable code.
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:870 rtl92c_dm_txpower_tracking_callback_thermalmeter() error: buffer overflow 'txpwr_level' 2 <= 2
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:870 rtl92c_dm_txpower_tracking_callback_thermalmeter() error: buffer overflow 'txpwr_level' 2 <= 2
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:882 rtl92c_dm_txpower_tracking_callback_thermalmeter() error: buffer overflow 'txpwr_level' 2 <= 2
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:883 rtl92c_dm_txpower_tracking_callback_thermalmeter() error: buffer overflow 'txpwr_level' 2 <= 2
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:891 rtl92c_dm_txpower_tracking_callback_thermalmeter() error: buffer overflow 'txpwr_level' 2 <= 2
drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c:892 rtl92c_dm_txpower_tracking_callback_thermalmeter() error: buffer overflow 'txpwr_level' 2 <= 2

The unreachable code message is fixed by commenting out the code that follows a return.
The code is not deleted in case it is needed later.

The errors are fixed by increasing the size of txpwr_level.

Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
---
 drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c b/drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c
index d2d57a2..0721756 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192c/dm_common.c
@@ -541,6 +541,7 @@ EXPORT_SYMBOL(rtl92c_dm_write_dig);

 static void rtl92c_dm_pwdb_monitor(struct ieee80211_hw *hw)
 {
+/*
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	long tmpentry_max_pwdb = 0, tmpentry_min_pwdb = 0xff;

@@ -564,6 +565,7 @@ static void rtl92c_dm_pwdb_monitor(struct ieee80211_hw *hw)
 	h2c_parameter[0] = 0;

 	rtl92c_fill_h2c_cmd(hw, H2C_RSSI_REPORT, 3, h2c_parameter);
+ */
 }

 void rtl92c_dm_init_edca_turbo(struct ieee80211_hw *hw)
@@ -673,7 +675,7 @@ static void rtl92c_dm_txpower_tracking_callback_thermalmeter(struct ieee80211_hw
 	s8 cck_index = 0;
 	int i;
 	bool is2t = IS_92C_SERIAL(rtlhal->version);
-	s8 txpwr_level[2] = {0, 0};
+	s8 txpwr_level[3] = {0, 0, 0};
 	u8 ofdm_min_index = 6, rf;

 	rtlpriv->dm.txpower_trackinginit = true;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 2/8 V2] rtlwifi: rtl8192de: Fix smatch warnings in rtl8192de/hw.c
From: Larry Finger @ 2013-09-16 18:55 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA, Larry Finger,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1379357722-17687-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

Smatch lists the following:
  CHECK   drivers/net/wireless/rtlwifi/rtl8192de/hw.c
drivers/net/wireless/rtlwifi/rtl8192de/hw.c:1200 rtl92de_set_qos() info: ignoring unreachable code.
drivers/net/wireless/rtlwifi/rtl8192de/hw.c:1200 rtl92de_set_qos() info: ignoring unreachable code.

Dead code is commented out. It has not been deleted in case I find a need for
it later.

Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
---
 drivers/net/wireless/rtlwifi/rtl8192de/hw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/rtlwifi/rtl8192de/hw.c
index 7dd8f6d..c9b0894 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192de/hw.c
@@ -1194,6 +1194,7 @@ void rtl92d_linked_set_reg(struct ieee80211_hw *hw)
  * mac80211 will send pkt when scan */
 void rtl92de_set_qos(struct ieee80211_hw *hw, int aci)
 {
+/*
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	rtl92d_dm_init_edca_turbo(hw);
 	return;
@@ -1213,6 +1214,7 @@ void rtl92de_set_qos(struct ieee80211_hw *hw, int aci)
 		RT_ASSERT(false, "invalid aci: %d !\n", aci);
 		break;
 	}
+ */
 }
 
 void rtl92de_enable_interrupt(struct ieee80211_hw *hw)
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH net-next 2/2] bridge: fix NULL pointer deref of br_port_get_rcu
From: Stephen Hemminger @ 2013-09-16 18:44 UTC (permalink / raw)
  To: vyasevic; +Cc: Hong Zhiguo, netdev, davem, eric.dumazet, Hong Zhiguo
In-Reply-To: <52374FDF.2060301@redhat.com>

On Mon, 16 Sep 2013 14:37:19 -0400
Vlad Yasevich <vyasevic@redhat.com> wrote:

> On 09/16/2013 02:32 PM, Stephen Hemminger wrote:
> > On Mon, 16 Sep 2013 13:58:30 -0400
> > Vlad Yasevich <vyasevic@redhat.com> wrote:
> >
> >> On 09/14/2013 10:42 AM, Hong Zhiguo wrote:
> >>> From: Hong Zhiguo <zhiguohong@tencent.com>
> >>>
> >>> The NULL deref happens when br_handle_frame is called between these
> >>> 2 lines of del_nbp:
> >>> 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
> >>> 	/* --> br_handle_frame is called at this time */
> >>> 	netdev_rx_handler_unregister(dev);
> >>>
> >>> In br_handle_frame the return of br_port_get_rcu(dev) is dereferenced
> >>> without check but br_port_get_rcu(dev) returns NULL if:
> >>> 	!(dev->priv_flags & IFF_BRIDGE_PORT)
> >>>
> >>> Eric Dumazet pointed out the testing of IFF_BRIDGE_PORT is not necessary
> >>> here since we're in rcu_read_lock and we have synchronize_net() in
> >>> netdev_rx_handler_unregister. So remove the testing of IFF_BRIDGE_PORT
> >>> and by the previous patch, make sure br_port_get_rcu is called in
> >>> bridging code.
> >>>
> >>> Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com>
> >>
> >> I think would be better to also include your initial patch to move the
> >> call netdev_rx_handler_unregister(dev) up higher as it would reduce the
> >> racy nature of input processing and port removal.
> >>
> >> As it is now, up until netdev_rx_handler_unregister(dev) call, the input
> >> process may call into the bridge code only to drop the packet.  With
> >> ebtables, that can consume considerable time that is wasted.  By
> >> performing the unregister() sooner we reduce the racy nature of the two
> >> calls.
> >>
> >> -vlad
> >
> > The change to just use rcu_dereference is safe.
> > The flag ordering doesn't matter. it is only valid to check it under RTNL.
> >
> 
> Yes, I agree.  I am just saying that that there are other things that 
> will be happening at the same time as input processing like port state 
> change and fdb table change and it might be worthwhile to easily prevent 
> this racy processing.

Port state change is protected by RTNL.
FDB table is fine, it is RCU protected.

^ permalink raw reply

* Re: [PATCH net-next 2/2] bridge: fix NULL pointer deref of br_port_get_rcu
From: Vlad Yasevich @ 2013-09-16 18:37 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Hong Zhiguo, netdev, davem, eric.dumazet, Hong Zhiguo
In-Reply-To: <20130916113242.38562647@nehalam.linuxnetplumber.net>

On 09/16/2013 02:32 PM, Stephen Hemminger wrote:
> On Mon, 16 Sep 2013 13:58:30 -0400
> Vlad Yasevich <vyasevic@redhat.com> wrote:
>
>> On 09/14/2013 10:42 AM, Hong Zhiguo wrote:
>>> From: Hong Zhiguo <zhiguohong@tencent.com>
>>>
>>> The NULL deref happens when br_handle_frame is called between these
>>> 2 lines of del_nbp:
>>> 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
>>> 	/* --> br_handle_frame is called at this time */
>>> 	netdev_rx_handler_unregister(dev);
>>>
>>> In br_handle_frame the return of br_port_get_rcu(dev) is dereferenced
>>> without check but br_port_get_rcu(dev) returns NULL if:
>>> 	!(dev->priv_flags & IFF_BRIDGE_PORT)
>>>
>>> Eric Dumazet pointed out the testing of IFF_BRIDGE_PORT is not necessary
>>> here since we're in rcu_read_lock and we have synchronize_net() in
>>> netdev_rx_handler_unregister. So remove the testing of IFF_BRIDGE_PORT
>>> and by the previous patch, make sure br_port_get_rcu is called in
>>> bridging code.
>>>
>>> Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com>
>>
>> I think would be better to also include your initial patch to move the
>> call netdev_rx_handler_unregister(dev) up higher as it would reduce the
>> racy nature of input processing and port removal.
>>
>> As it is now, up until netdev_rx_handler_unregister(dev) call, the input
>> process may call into the bridge code only to drop the packet.  With
>> ebtables, that can consume considerable time that is wasted.  By
>> performing the unregister() sooner we reduce the racy nature of the two
>> calls.
>>
>> -vlad
>
> The change to just use rcu_dereference is safe.
> The flag ordering doesn't matter. it is only valid to check it under RTNL.
>

Yes, I agree.  I am just saying that that there are other things that 
will be happening at the same time as input processing like port state 
change and fdb table change and it might be worthwhile to easily prevent 
this racy processing.

-vlad

^ permalink raw reply

* Re: [PATCH net-next 2/2] bridge: fix NULL pointer deref of br_port_get_rcu
From: Stephen Hemminger @ 2013-09-16 18:32 UTC (permalink / raw)
  To: vyasevic; +Cc: Hong Zhiguo, netdev, davem, eric.dumazet, Hong Zhiguo
In-Reply-To: <523746C6.3010700@redhat.com>

On Mon, 16 Sep 2013 13:58:30 -0400
Vlad Yasevich <vyasevic@redhat.com> wrote:

> On 09/14/2013 10:42 AM, Hong Zhiguo wrote:
> > From: Hong Zhiguo <zhiguohong@tencent.com>
> >
> > The NULL deref happens when br_handle_frame is called between these
> > 2 lines of del_nbp:
> > 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
> > 	/* --> br_handle_frame is called at this time */
> > 	netdev_rx_handler_unregister(dev);
> >
> > In br_handle_frame the return of br_port_get_rcu(dev) is dereferenced
> > without check but br_port_get_rcu(dev) returns NULL if:
> > 	!(dev->priv_flags & IFF_BRIDGE_PORT)
> >
> > Eric Dumazet pointed out the testing of IFF_BRIDGE_PORT is not necessary
> > here since we're in rcu_read_lock and we have synchronize_net() in
> > netdev_rx_handler_unregister. So remove the testing of IFF_BRIDGE_PORT
> > and by the previous patch, make sure br_port_get_rcu is called in
> > bridging code.
> >
> > Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com>
> 
> I think would be better to also include your initial patch to move the
> call netdev_rx_handler_unregister(dev) up higher as it would reduce the
> racy nature of input processing and port removal.
> 
> As it is now, up until netdev_rx_handler_unregister(dev) call, the input
> process may call into the bridge code only to drop the packet.  With 
> ebtables, that can consume considerable time that is wasted.  By 
> performing the unregister() sooner we reduce the racy nature of the two
> calls.
> 
> -vlad

The change to just use rcu_dereference is safe.
The flag ordering doesn't matter. it is only valid to check it under RTNL.

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Russell King - ARM Linux @ 2013-09-16 18:28 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Ethan Tuttle, Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916174708.GG3188@1wt.eu>

On Mon, Sep 16, 2013 at 07:47:08PM +0200, Willy Tarreau wrote:
> I'll have to rebuild with your config and exact 3.11 to test again.
> Can you check the packet rate of your ping flood to give an order of
> magnitude so that we're sure to be in the same conditions ?

Also, try swapping kernel binaries between yourselves, so that you can
be sure you're running the exact same kernel on different hardware.

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Russell King - ARM Linux @ 2013-09-16 18:25 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev, Ethan Tuttle,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916174514.GF3188@1wt.eu>

On Mon, Sep 16, 2013 at 07:45:14PM +0200, Willy Tarreau wrote:
> This board has a really clean routing and placement, chips are very close.
> That does not rule out the possibility of a lacking termination, but it
> would probably affect more users.

True - though in your photographs, we can't see the tracking for the
data bus, because that's all buried in the inner board layers.
However, there is some evidence in there of trace lengths being matched
which is a good sign. :)

> On Mon, Sep 16, 2013 at 06:14:16PM +0100, Russell King - ARM Linux wrote:
> > Marginal or noisy power supplies could also be a problem - for example,
> > if the impedance of the power supply connections is too great, it may
> > work with some patterns of use but not others.
> 
> We have some margin here, I measured less than 1 Amp to boot and something
> like 6-700 mA in idle if my memory serves me correctly. The 3A PSU and its
> thicker-than-average wires seem safe. I think that Globalscale learned a
> lot from the horrible Guruplug design that all this part needs to be done
> correctly and they did a very clean job this time.

Not quite the power supply I was referring to - I'm talking about the
on-board regulators which supply the 3.3V and other lower voltages to
the SDRAM and SoC - and the quality of their decoupling.  The on-board
regulators will have a certain degree of "line" noise immunity.

If I had to guess, I'd say C366 is probably the output bulk capacitor on
the CPU core supply (which comes via BIT7, C273, L1, U19 being the
switching regulator chip.

I'd also guess one of C370, C396 or C398 supplies the SDRAM - and of
those C370 is the most likely - the resistors in the boxes marked K and
B, and R123 I suspect may be the SDRAM data bus termination (that
covers R107 to R136), though I only count a total of 30 of those
connecting to U5 pin 4 - and that point looks _well_ decoupled with lots
of capacitors (C8-C16, C287 on one side, C7, C246, C247 on the other.)
The other two?  Maybe R105/106 which are on the underside of the CPU,
though they're a long way from that well decoupled point.  R137/138?
They're up by the NAND chip and connect to ground.  Though... if one
of those is for D8...

Anyway, that's all speculation.

^ permalink raw reply

* Re: [PATCH net-next 2/2] bridge: fix NULL pointer deref of br_port_get_rcu
From: Vlad Yasevich @ 2013-09-16 17:58 UTC (permalink / raw)
  To: Hong Zhiguo; +Cc: netdev, davem, eric.dumazet, Hong Zhiguo
In-Reply-To: <1379169748-767-3-git-send-email-zhiguohong@tencent.com>

On 09/14/2013 10:42 AM, Hong Zhiguo wrote:
> From: Hong Zhiguo <zhiguohong@tencent.com>
>
> The NULL deref happens when br_handle_frame is called between these
> 2 lines of del_nbp:
> 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
> 	/* --> br_handle_frame is called at this time */
> 	netdev_rx_handler_unregister(dev);
>
> In br_handle_frame the return of br_port_get_rcu(dev) is dereferenced
> without check but br_port_get_rcu(dev) returns NULL if:
> 	!(dev->priv_flags & IFF_BRIDGE_PORT)
>
> Eric Dumazet pointed out the testing of IFF_BRIDGE_PORT is not necessary
> here since we're in rcu_read_lock and we have synchronize_net() in
> netdev_rx_handler_unregister. So remove the testing of IFF_BRIDGE_PORT
> and by the previous patch, make sure br_port_get_rcu is called in
> bridging code.
>
> Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com>

I think would be better to also include your initial patch to move the
call netdev_rx_handler_unregister(dev) up higher as it would reduce the
racy nature of input processing and port removal.

As it is now, up until netdev_rx_handler_unregister(dev) call, the input
process may call into the bridge code only to drop the packet.  With 
ebtables, that can consume considerable time that is wasted.  By 
performing the unregister() sooner we reduce the racy nature of the two
calls.

-vlad

> ---
>   net/bridge/br_private.h | 5 +----
>   1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 49fb43e..1aaca0e 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -202,10 +202,7 @@ struct net_bridge_port
>
>   static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
>   {
> -	struct net_bridge_port *port =
> -			rcu_dereference_rtnl(dev->rx_handler_data);
> -
> -	return br_port_exists(dev) ? port : NULL;
> +	return rcu_dereference(dev->rx_handler_data);
>   }
>
>   static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev)
>

^ permalink raw reply

* Re: [PATCH net 0/4] bridge: Fix problems around the PVID
From: Vlad Yasevich @ 2013-09-16 17:49 UTC (permalink / raw)
  To: Toshiaki Makita
  Cc: David Miller, makita.toshiaki, netdev, Fernando Luis Vazquez Cao,
	Patrick McHardy
In-Reply-To: <1379074013.1678.16.camel@localhost.localdomain>

On 09/13/2013 08:06 AM, Toshiaki Makita wrote:
> On Thu, 2013-09-12 at 16:00 -0400, David Miller wrote:
>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>> Date: Tue, 10 Sep 2013 19:27:54 +0900
>>
>>> There seem to be some undesirable behaviors related with PVID.
>>> 1. It has no effect assigning PVID to a port. PVID cannot be applied
>>> to any frame regardless of whether we set it or not.
>>> 2. FDB entries learned via frames applied PVID are registered with
>>> VID 0 rather than VID value of PVID.
>>> 3. We can set 0 or 4095 as a PVID that are not allowed in IEEE 802.1Q.
>>> This leads interoperational problems such as sending frames with VID
>>> 4095, which is not allowed in IEEE 802.1Q, and treating frames with VID
>>> 0 as they belong to VLAN 0, which is expected to be handled as they have
>>> no VID according to IEEE 802.1Q.
>>>
>>> Note: 2nd and 3rd problems are potential and not exposed unless 1st problem
>>> is fixed, because we cannot activate PVID due to it.
>>
>> Please work out the issues in patch #2 with Vlad and resubmit this
>> series.
>>
>> Thank you.
>
> I'm hovering between whether we should fix the issue by changing vlan 0
> interface behavior in 8021q module or enabling a bridge port to sending
> priority-tagged frames, or another better way.
>
> If you could comment it, I'd appreciate it :)
>
>
> BTW, I think what is discussed in patch #2 is another problem about
> handling priority-tags, and it exists without this patch set applied.
> It looks like that we should prepare another patch set than this to fix
> that problem.
>
> Should I include patches that fix the priority-tags problem in this
> patch set and resubmit them all together?
>

I am thinking that we might need to do it in bridge and it looks like
the simplest way to do it is to have default priority regeneration table
(table 6-5 from 802.1Q doc).

That way I think we would conform to the spec.

-vlad

>
> Thanks,
>
> Toshiaki Makita
>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
>

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Willy Tarreau @ 2013-09-16 17:47 UTC (permalink / raw)
  To: Ethan Tuttle
  Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <CACzLR4ssOPsmYaEh2ohe4HL5j+67keDYnmR4YezNB=aO7Pj5kA@mail.gmail.com>

On Mon, Sep 16, 2013 at 10:24:05AM -0700, Ethan Tuttle wrote:
> I have not opened my mirabox - but sure, I'll open it up and try those
> other settings when I get a chance.

OK

> Also, you mentioned that you have SMP disabled in your kernel.  It
> looks like it's on in my .config.  Should I run a test with SMP
> disabled?

You may want to try but I wouldn't bet on this.

> I'm surprised that nobody else sees this crash given how easy it is
> for me to reproduce.  BTW, the 3.11 kernel I made with 702821f
> reverted has been humming along for days without issue.

I'll have to rebuild with your config and exact 3.11 to test again.
Can you check the packet rate of your ping flood to give an order of
magnitude so that we're sure to be in the same conditions ?

Willy

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Willy Tarreau @ 2013-09-16 17:45 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev, Ethan Tuttle,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916171416.GM12758@n2100.arm.linux.org.uk>

On Mon, Sep 16, 2013 at 06:14:16PM +0100, Russell King - ARM Linux wrote:
> On Mon, Sep 16, 2013 at 06:24:50PM +0200, Thomas Petazzoni wrote:
> > Could this be caused by bitflips in the RAM due to bad timings, or
> > overheating or that kind of things?
> 
> Well, the SoC is an Armada 370, which uses Marvell's own Sheeva core.
> From what I understand, this is a CPU designed entirely by Marvell, so
> the interpretation of these codes may not be correct.  This is made
> harder to diagnose in that Marvell is soo secret with their
> documentation; indeed for this CPU there is no information publically
> available (there's only the product briefs).

Yes and their salesmen never respond after many attempts in more than one
year now. Looks like they want to keep their chips for themselves only :-(

> Bad timings could certainly cause bitflips, as could poor routing of
> data line D8 (eg, incorrect termination or routing causing reflections
> on the data line - remember that with modern hardware, almost every
> signal is a transmission line).

This board has a really clean routing and placement, chips are very close.
That does not rule out the possibility of a lacking termination, but it
would probably affect more users.

> Marginal or noisy power supplies could also be a problem - for example,
> if the impedance of the power supply connections is too great, it may
> work with some patterns of use but not others.

We have some margin here, I measured less than 1 Amp to boot and something
like 6-700 mA in idle if my memory serves me correctly. The 3A PSU and its
thicker-than-average wires seem safe. I think that Globalscale learned a
lot from the horrible Guruplug design that all this part needs to be done
correctly and they did a very clean job this time.

> There's soo many possibilities...

Including faulty components. I'm not aware of an equivalent of cpuburn for
ARM, it would probably help, though it's probably harder to design in a
generic way than on x86 where all systems are the same.

> However, if the fault codes above really do equate to what's in the ARMv7
> Architecture Reference Manual, I think we can rule out the routing and
> RAM chips - because a cache parity error points to bit flips in the cache,
> or if there is no cache parity checking implemented, it means something
> is corrupting the state of the SoC - which could be due to bad power
> supplies.
> 
> How do we get to the bottom of this?  That's a very good question - one
> which is going to be very difficult to solve.  Ideally, it means working
> with the manufacturer's design team to try and work out what's going on
> at the board level, probably using logic analysers to capture the bus
> activity leading up to the failure.  Also, checking the power supplies
> at the SoC too - checking that they're within correct tolerance and
> checking the amount of noise on them.
> 
> I think all we can do at the moment is to wait for further reports to roll
> in and see whether a better pattern emerges.

Especially since there are also some heavy testers who don't seem to be
impacted :-/

> If you want to try something - and you suspect it may be heat related,
> you could try putting the board inside a container, monitor the temperature
> inside the container, and put it in your freezer!  Just be careful of the
> temperature of the other devices on the board getting too cold though -
> remember, most consumer electronics is only rated for an *operating*
> temperature range of 0°C to 70°C and your freezer will be something like
> -20°C - so don't let the ambient temperature inside the container go
> below 0°C!  If the CPU is producing lots of heat though, it may keep the
> container sufficiently warm that that's not a problem.  The theory is
> that by making the ambient 15 to 20°C cooler, you will also lower the
> temperature of the hotter parts by a similar amount.

Sometimes you can also do the opposite, heat it gently with an hair dryer
while working to see if problems happen moore frequently. It's often easier
to do than working in a cold place as you don't have issues with the wires,
and it does not accumulate moist.

I've detected some early failures this way ; the NAND in my Iomega Iconnect
is extremely sensitive to heating to the point that I had to stick a heat
sink on it and take the board out of its case to avoid hangs. The hair
dryer quickly revealed the culprit in a few minutes when it took weeks to
get a failure before.

Willy

^ permalink raw reply

* Re: [PATCH 5/7: rtlwifi: Fix smatch warning in pci.c
From: Larry Finger @ 2013-09-16 17:39 UTC (permalink / raw)
  To: David Laight; +Cc: linville, linux-wireless, netdev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B733B@saturn3.aculab.com>

On 09/16/2013 04:26 AM, David Laight wrote:
>> Smatch reports the following:
>>    CHECK   drivers/net/wireless/rtlwifi/pci.c
>> drivers/net/wireless/rtlwifi/pci.c:739 _rtl_pci_rx_interrupt() warn: assigning (-98) to unsigned
>> variable 'stats.noise'
>>
>> This problem is fixed by changing the value to 256 - 98.
>>
>> Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
>> ---
>>   drivers/net/wireless/rtlwifi/pci.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
>> index 703f839..bf498f5 100644
>> --- a/drivers/net/wireless/rtlwifi/pci.c
>> +++ b/drivers/net/wireless/rtlwifi/pci.c
>> @@ -736,7 +736,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
>>
>>   	struct rtl_stats stats = {
>>   		.signal = 0,
>> -		.noise = -98,
>> +		.noise = 158,	/* -98 dBm */
>>   		.rate = 0,
>>   	};
>>   	int index = rtlpci->rx_ring[rx_queue_idx].idx;
>
> That doesn't look nice at all.
> Something like (unsigned int)-98 would be slightly better,
> but it looks as though something is actually wrong with
> the type of 'noise' itself.

The type of 'noise' is probably a legacy of wireless extensions. In fact, that 
variable is not used and will be deleted in V2 of the patches.

Thanks,

Larry

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Ethan Tuttle @ 2013-09-16 17:24 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916164412.GD3188@1wt.eu>

On Mon, Sep 16, 2013 at 9:44 AM, Willy Tarreau <w@1wt.eu> wrote:
> On Mon, Sep 16, 2013 at 06:39:37PM +0200, Willy Tarreau wrote:
>> On Mon, Sep 16, 2013 at 09:35:16AM -0700, Ethan Tuttle wrote:
>> > On Mon, Sep 16, 2013 at 8:51 AM, Thomas Petazzoni
>> > <thomas.petazzoni@free-electrons.com> wrote:
>> > > Willy, Ethan,
>> > >
>> > > On Mon, 16 Sep 2013 08:50:47 +0200, Willy Tarreau wrote:
>> > >
>> > >> I'm currently testing on 3.11.1 (which I had here) and am not getting
>> > >> any issue after 50M packets. My kernel is running in thumb mode and
>> > >> without SMP.
>> > >>
>> > >> Ethan, we'll need your config I guess.
>> > >
>> > > Can both of you also report the U-Boot version you're using, and the
>> > > SoC revision (it's visible in the U-Boot output).
>> >
>> > Mine says:
>> >
>> > U-Boot 2009.08 (Sep 16 2012 - 22:50:06)Marvell version: 1.1.2 NQ
>> > SoC:   MV6710 A1
>>
>> I just checked on my old captures and I have the same here, with more
>> details such as the CPU's revision (Rev 1) :
>>
>>    http://1wt.eu/articles/mirabox-vs-guruplug/
>
> BTW Ethan, I don't know if you have already opened your mirabox, but on the
> link above you'll find settings for trying other frequencies for the CPU. It
> could be nice to try 1 GHz with L2/DDR @500 instead of 1200/600 to see if the
> issue remains or not. If it disappears, there's also a working setting with
> CPU@1.2G, L2@800M and DDR@400M to help find if CPU, L2 or DDR is the culprit.
>
> Willy
>

I have not opened my mirabox - but sure, I'll open it up and try those
other settings when I get a chance.

Also, you mentioned that you have SMP disabled in your kernel.  It
looks like it's on in my .config.  Should I run a test with SMP
disabled?

I'm surprised that nobody else sees this crash given how easy it is
for me to reproduce.  BTW, the 3.11 kernel I made with 702821f
reverted has been humming along for days without issue.

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Russell King - ARM Linux @ 2013-09-16 17:14 UTC (permalink / raw)
  To: Thomas Petazzoni
  Cc: Willy Tarreau, Andrew Lunn, Jason Cooper, netdev, Ethan Tuttle,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916182450.639084c6@skate>

On Mon, Sep 16, 2013 at 06:24:50PM +0200, Thomas Petazzoni wrote:
> Could this be caused by bitflips in the RAM due to bad timings, or
> overheating or that kind of things?

Well, the SoC is an Armada 370, which uses Marvell's own Sheeva core.
From what I understand, this is a CPU designed entirely by Marvell, so
the interpretation of these codes may not be correct.  This is made
harder to diagnose in that Marvell is soo secret with their
documentation; indeed for this CPU there is no information publically
available (there's only the product briefs).

Bad timings could certainly cause bitflips, as could poor routing of
data line D8 (eg, incorrect termination or routing causing reflections
on the data line - remember that with modern hardware, almost every
signal is a transmission line).

Marginal or noisy power supplies could also be a problem - for example,
if the impedance of the power supply connections is too great, it may
work with some patterns of use but not others.

There's soo many possibilities...

However, if the fault codes above really do equate to what's in the ARMv7
Architecture Reference Manual, I think we can rule out the routing and
RAM chips - because a cache parity error points to bit flips in the cache,
or if there is no cache parity checking implemented, it means something
is corrupting the state of the SoC - which could be due to bad power
supplies.

How do we get to the bottom of this?  That's a very good question - one
which is going to be very difficult to solve.  Ideally, it means working
with the manufacturer's design team to try and work out what's going on
at the board level, probably using logic analysers to capture the bus
activity leading up to the failure.  Also, checking the power supplies
at the SoC too - checking that they're within correct tolerance and
checking the amount of noise on them.

I think all we can do at the moment is to wait for further reports to roll
in and see whether a better pattern emerges.

If you want to try something - and you suspect it may be heat related,
you could try putting the board inside a container, monitor the temperature
inside the container, and put it in your freezer!  Just be careful of the
temperature of the other devices on the board getting too cold though -
remember, most consumer electronics is only rated for an *operating*
temperature range of 0°C to 70°C and your freezer will be something like
-20°C - so don't let the ambient temperature inside the container go
below 0°C!  If the CPU is producing lots of heat though, it may keep the
container sufficiently warm that that's not a problem.  The theory is
that by making the ambient 15 to 20°C cooler, you will also lower the
temperature of the hotter parts by a similar amount.

^ permalink raw reply

* [PATCH RFC net] msi: free msi_desc entry only after we've released the kobject
From: Veaceslav Falico @ 2013-09-16 17:09 UTC (permalink / raw)
  To: netdev; +Cc: Veaceslav Falico, Neil Horman, Russell King, Bjorn Helgaas

Currently, we first do kobject_put(&entry->kobj) and the kfree(entry),
however kobject_put() doesn't guarantee us that it was the last reference
and that the kobj isn't used currently by someone else, so after we
kfree(entry) with the struct kobject - other users will begin using the
freed memory, instead of the actual kobject.

Fix this by using the kobject->release callback, which is called last when
the kobject is indeed not used and is cleaned up - it's msi_kobj_release(),
which can do the kfree(entry) safely (kobject_put/cleanup doesn't use the
kobj itself after ->release() was called, so we're safe).

Also, in case we've failed to create the sysfs directories - just kfree()
it - cause we don't have the kobjects attached.

CC: Neil Horman <nhorman@tuxdriver.com>
CC: Russell King <rmk+kernel@arm.linux.org.uk>
CC: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    This patch is really an RFC, and I don't know for sure how to correctly
    fix it, however it seems to work. Sorry if I've done something horribly
    wrong, it really seems to work ok :).
    
    I've hit the bug with the recent CONFIG_DEBUG_KOBJECT_RELEASE - it basically
    delays the cleanup a bit - so that the chances are a lot higher even for
    one user to hit it.
    
    Or, maybe, it will be better to just add an kobject helper
    kobject_wait_cleanup(), which will return only after it's indeed free? I'm
    really not sure.

 drivers/pci/msi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index b35f93c..6eabf93 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -395,6 +395,7 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		list_del(&entry->list);
 
 		/*
 		 * Its possible that we get into this path
@@ -405,10 +406,9 @@ static void free_msi_irqs(struct pci_dev *dev)
 		if (entry->kobj.parent) {
 			kobject_del(&entry->kobj);
 			kobject_put(&entry->kobj);
+		} else {
+			kfree(entry);
 		}
-
-		list_del(&entry->list);
-		kfree(entry);
 	}
 }
 
@@ -531,6 +531,7 @@ static void msi_kobj_release(struct kobject *kobj)
 	struct msi_desc *entry = to_msi_desc(kobj);
 
 	pci_dev_put(entry->dev);
+	kfree(entry);
 }
 
 static struct kobj_type msi_irq_ktype = {
-- 
1.8.4

^ permalink raw reply related

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Willy Tarreau @ 2013-09-16 16:44 UTC (permalink / raw)
  To: Ethan Tuttle
  Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916163937.GC3188@1wt.eu>

On Mon, Sep 16, 2013 at 06:39:37PM +0200, Willy Tarreau wrote:
> On Mon, Sep 16, 2013 at 09:35:16AM -0700, Ethan Tuttle wrote:
> > On Mon, Sep 16, 2013 at 8:51 AM, Thomas Petazzoni
> > <thomas.petazzoni@free-electrons.com> wrote:
> > > Willy, Ethan,
> > >
> > > On Mon, 16 Sep 2013 08:50:47 +0200, Willy Tarreau wrote:
> > >
> > >> I'm currently testing on 3.11.1 (which I had here) and am not getting
> > >> any issue after 50M packets. My kernel is running in thumb mode and
> > >> without SMP.
> > >>
> > >> Ethan, we'll need your config I guess.
> > >
> > > Can both of you also report the U-Boot version you're using, and the
> > > SoC revision (it's visible in the U-Boot output).
> > 
> > Mine says:
> > 
> > U-Boot 2009.08 (Sep 16 2012 - 22:50:06)Marvell version: 1.1.2 NQ
> > SoC:   MV6710 A1
> 
> I just checked on my old captures and I have the same here, with more
> details such as the CPU's revision (Rev 1) :
> 
>    http://1wt.eu/articles/mirabox-vs-guruplug/

BTW Ethan, I don't know if you have already opened your mirabox, but on the
link above you'll find settings for trying other frequencies for the CPU. It
could be nice to try 1 GHz with L2/DDR @500 instead of 1200/600 to see if the
issue remains or not. If it disappears, there's also a working setting with
CPU@1.2G, L2@800M and DDR@400M to help find if CPU, L2 or DDR is the culprit.

Willy

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Willy Tarreau @ 2013-09-16 16:39 UTC (permalink / raw)
  To: Ethan Tuttle
  Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <CACzLR4twTNjzU8hK3L1cYUXnEH1ydwY6jnYoOG+9vLB3-4uQ4w@mail.gmail.com>

On Mon, Sep 16, 2013 at 09:35:16AM -0700, Ethan Tuttle wrote:
> On Mon, Sep 16, 2013 at 8:51 AM, Thomas Petazzoni
> <thomas.petazzoni@free-electrons.com> wrote:
> > Willy, Ethan,
> >
> > On Mon, 16 Sep 2013 08:50:47 +0200, Willy Tarreau wrote:
> >
> >> I'm currently testing on 3.11.1 (which I had here) and am not getting
> >> any issue after 50M packets. My kernel is running in thumb mode and
> >> without SMP.
> >>
> >> Ethan, we'll need your config I guess.
> >
> > Can both of you also report the U-Boot version you're using, and the
> > SoC revision (it's visible in the U-Boot output).
> 
> Mine says:
> 
> U-Boot 2009.08 (Sep 16 2012 - 22:50:06)Marvell version: 1.1.2 NQ
> SoC:   MV6710 A1

I just checked on my old captures and I have the same here, with more
details such as the CPU's revision (Rev 1) :

   http://1wt.eu/articles/mirabox-vs-guruplug/

Willy

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Ethan Tuttle @ 2013-09-16 16:35 UTC (permalink / raw)
  To: Thomas Petazzoni
  Cc: Willy Tarreau, Andrew Lunn, Jason Cooper, netdev, Ezequiel Garcia,
	Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916175152.4e013457@skate>

On Mon, Sep 16, 2013 at 8:51 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Willy, Ethan,
>
> On Mon, 16 Sep 2013 08:50:47 +0200, Willy Tarreau wrote:
>
>> I'm currently testing on 3.11.1 (which I had here) and am not getting
>> any issue after 50M packets. My kernel is running in thumb mode and
>> without SMP.
>>
>> Ethan, we'll need your config I guess.
>
> Can both of you also report the U-Boot version you're using, and the
> SoC revision (it's visible in the U-Boot output).

Mine says:

U-Boot 2009.08 (Sep 16 2012 - 22:50:06)Marvell version: 1.1.2 NQ
SoC:   MV6710 A1

> Maybe Globalscale is
> shipping Mirabox with a different version of the bootloader, or some
> hardware difference, that is causing problems? (I'm just speculating
> here, but another user already reported having issues with his Mirabox,
> and Russell King analyzed the oops as very likely being hardware
> problems).
>
> Thomas
> --
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Thomas Petazzoni @ 2013-09-16 16:24 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Willy Tarreau, Andrew Lunn, Jason Cooper, netdev, Ethan Tuttle,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916162209.GL12758@n2100.arm.linux.org.uk>

Russell,

On Mon, 16 Sep 2013 17:22:09 +0100, Russell King - ARM Linux wrote:

> One seemed to be a single bit error in an instruction inside the kernel
> image.  The other was what seems to be an impossible abort.
> 
> I still don't see how we could end up with a prefetch abort inside memset()
> due to the kernel domain being inaccessible, but still be able to get
> an oops out, especially when we dump out the memory for the faulting
> instruction by accessing that memory via that apparantly inaccessible
> domain while running the code which dumps that memory also under this
> apparantly inaccessible domain.  If the domain containing the kernel
> really was inaccessible, the system would be completely dead.
> 
> The only possibilities I can come up with for that is that abort was
> caused by something spurious happening at the hardware level causing
> corruption of the instruction TLB (corrupting the domain index stored
> in the I-TLB) or other CPU control hardware causing it to spuriously
> generate that fault.
> 
> As the domain field in the page table L1 entries covers bit 8, and the
> single bit error with the instruction was also bit 8, maybe there's a
> design weakness on data line bit 8 causing marginal operation.
> 
> To add to this, the abort given in this report gives an IFSR value of
> 0x409, which equates to "Synchronous parity error on memory access"
> in ARMv7.  The other value (0x400) equates to "TLB conflict abort"
> which can only happen with LPAE support enabled...  So this is just
> getting more weird!

Could this be caused by bitflips in the RAM due to bad timings, or
overheating or that kind of things?

Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

^ permalink raw reply

* Re: mvneta: oops in __rcu_read_lock on mirabox
From: Russell King - ARM Linux @ 2013-09-16 16:22 UTC (permalink / raw)
  To: Thomas Petazzoni
  Cc: Willy Tarreau, Andrew Lunn, Jason Cooper, netdev, Ethan Tuttle,
	Ezequiel Garcia, Gregory Clément, linux-arm-kernel
In-Reply-To: <20130916175152.4e013457@skate>

On Mon, Sep 16, 2013 at 05:51:52PM +0200, Thomas Petazzoni wrote:
> Willy, Ethan,
> 
> On Mon, 16 Sep 2013 08:50:47 +0200, Willy Tarreau wrote:
> 
> > I'm currently testing on 3.11.1 (which I had here) and am not getting
> > any issue after 50M packets. My kernel is running in thumb mode and
> > without SMP.
> > 
> > Ethan, we'll need your config I guess.
> 
> Can both of you also report the U-Boot version you're using, and the
> SoC revision (it's visible in the U-Boot output). Maybe Globalscale is
> shipping Mirabox with a different version of the bootloader, or some
> hardware difference, that is causing problems? (I'm just speculating
> here, but another user already reported having issues with his Mirabox,
> and Russell King analyzed the oops as very likely being hardware
> problems).

One seemed to be a single bit error in an instruction inside the kernel
image.  The other was what seems to be an impossible abort.

I still don't see how we could end up with a prefetch abort inside memset()
due to the kernel domain being inaccessible, but still be able to get
an oops out, especially when we dump out the memory for the faulting
instruction by accessing that memory via that apparantly inaccessible
domain while running the code which dumps that memory also under this
apparantly inaccessible domain.  If the domain containing the kernel
really was inaccessible, the system would be completely dead.

The only possibilities I can come up with for that is that abort was
caused by something spurious happening at the hardware level causing
corruption of the instruction TLB (corrupting the domain index stored
in the I-TLB) or other CPU control hardware causing it to spuriously
generate that fault.

As the domain field in the page table L1 entries covers bit 8, and the
single bit error with the instruction was also bit 8, maybe there's a
design weakness on data line bit 8 causing marginal operation.

To add to this, the abort given in this report gives an IFSR value of
0x409, which equates to "Synchronous parity error on memory access"
in ARMv7.  The other value (0x400) equates to "TLB conflict abort"
which can only happen with LPAE support enabled...  So this is just
getting more weird!

^ permalink raw reply

* [PATCH 4/4] drivers: net: phy: cicada.c: clears warning Use #include <linux/io.h> instead of <asm/io.h>
From: Avinash Kumar @ 2013-09-16 16:09 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, Avinash Kumar

clears following warnings :
WARNING: Use include <linux/io.h> instead of <asm/io.h>
WARNING: Use include <linux/uaccess.h> instead of <asm/uaccess.h>

Signed-off-by: Avinash Kumar <avi.kp.137@gmail.com>
---
 drivers/net/phy/cicada.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c
index db472ff..313a037 100644
--- a/drivers/net/phy/cicada.c
+++ b/drivers/net/phy/cicada.c
@@ -30,9 +30,9 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Cicada Extended Control Register 1 */
 #define MII_CIS8201_EXT_CON1           0x17
-- 
1.7.10.4

^ permalink raw reply related

* Fw: [Bug 61441] New: Network stop working ( (bnx2): transmit queue 0 timed out)
From: Stephen Hemminger @ 2013-09-16 15:53 UTC (permalink / raw)
  To: Michael Chan; +Cc: netdev



Begin forwarded message:

Date: Mon, 16 Sep 2013 03:58:45 -0700
From: "bugzilla-daemon@bugzilla.kernel.org" <bugzilla-daemon@bugzilla.kernel.org>
To: "stephen@networkplumber.org" <stephen@networkplumber.org>
Subject: [Bug 61441] New: Network stop working ( (bnx2): transmit queue 0 timed out)


https://bugzilla.kernel.org/show_bug.cgi?id=61441

            Bug ID: 61441
           Summary: Network stop working ( (bnx2): transmit queue 0 timed
                    out)
           Product: Networking
           Version: 2.5
    Kernel Version: 2.6.32.61
          Hardware: x86-64
                OS: Linux
              Tree: Mainline
            Status: NEW
          Severity: normal
          Priority: P1
         Component: IPV4
          Assignee: shemminger@linux-foundation.org
          Reporter: javibarroso@gmail.com
        Regression: No

Hello,

Do you have any hint to solve this problem ?

We have a NAS server (openfiler) with the latest 2.6.32 (.61) available kernel
compiled. Every 1/2 days the server stop serving.

The next message is show in messages:

Uhhuh. NMI received for unknown reason b1 on CPU 0.
You have some hardware problem, likely on the PCI bus.
Dazed and confused, but trying to continue
------------[ cut here ]------------
WARNING: at /usr/src/linux-2.6.32.61/net/sched/sch_generic.c:261
dev_watchdog+0x247/0x260()
Hardware name: ProLiant BL460c G1
NETDEV WATCHDOG: eth0 (bnx2): transmit queue 0 timed out
Modules linked in: autofs4 nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs fuse
8021q garp stp llc bonding ipv6 dm_round_robin dm_multipath ext4 jbd2 dm_mirror
dm_region_hash dm_log dm_snapshot dm_mod i5000_edac edac_core i5k_amb ipmi_si
sd_mod iTCO_wdt iTCO_vendor_support bnx2 sg tg3 serio_raw hwmon ipmi_msghandler
pcspkr hpilo crc_t10dif usb_storage qla2xxx scsi_transport_fc scsi_tgt shpchp
cciss ext3 jbd mbcache radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core
[last unloaded: microcode]
Pid: 0, comm: swapper Not tainted 2.6.32.61-SAE #2
Call Trace:
 <IRQ>  [<ffffffff81067bbb>] warn_slowpath_common+0x7b/0xc0
 [<ffffffff81067c61>] warn_slowpath_fmt+0x41/0x50
 [<ffffffff813f6c37>] dev_watchdog+0x247/0x260
 [<ffffffff8101a1b3>] ? native_sched_clock+0x13/0x80
 [<ffffffff81079d45>] ? internal_add_timer+0xb5/0x110
 [<ffffffff81079e24>] ? cascade+0x84/0xb0
 [<ffffffff8107a956>] run_timer_softirq+0x196/0x340
 [<ffffffff81097447>] ? ktime_get+0x57/0xd0
 [<ffffffff81070305>] __do_softirq+0xd5/0x200
 [<ffffffff81091006>] ? hrtimer_interrupt+0x146/0x260
 [<ffffffff8101424c>] call_softirq+0x1c/0x30
 [<ffffffff81015bf5>] do_softirq+0x65/0xa0
 [<ffffffff810700e5>] irq_exit+0x85/0x90
 [<ffffffff8149c771>] smp_apic_timer_interrupt+0x71/0x9c
 [<ffffffff81013c13>] apic_timer_interrupt+0x13/0x20
 <EOI>  [<ffffffff8101b94f>] ? mwait_idle+0x6f/0xd0
 [<ffffffff8149a53a>] ? atomic_notifier_call_chain+0x1a/0x20
 [<ffffffff81011e66>] ? cpu_idle+0xb6/0x110
 [<ffffffff8148e7e7>] ? start_secondary+0x1fc/0x23f
---[ end trace 177d1288aad2a52a ]---
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!
bnx2: Chip reset did not complete
bnx2: eth0: BUG! Tx ring full when queue awake!

No log is found at:

# ipmitool  sel
SEL Information
Version          : 1.5 (v1.5, v2 compliant)
Entries          : 0
Free Space       : 1024 bytes
Percent Used     : 0%
Last Add Time    : Not Available
Last Del Time    : 08/02/2007 14:23:47
Overflow         : false
Supported Cmds   : None
pear in ipmi:

Can we configure bnx2 module to mitigate that BUG ?

Thank you very much

-- 
You are receiving this mail because:
You are the assignee for the bug.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox