* Kernel crash when using multiple interfaces
@ 2015-05-15 12:54 Simon Vincent
2015-05-15 14:20 ` Alexander Aring
2015-05-15 15:28 ` Alexander Aring
0 siblings, 2 replies; 12+ messages in thread
From: Simon Vincent @ 2015-05-15 12:54 UTC (permalink / raw)
To: linux-wpan
I have found the Kernel crashes when multiple 802.15.4 interfaces are
used at the same time.
I have tracked it down in the kernel to net/mac802154/tx.c
The problem is the ieee802154_xmit_cb is a global variable so after it
has been assigned and added to the work queue it can be
corrupted/changed by another interface transmitting a packet.
I have fixed it by allocating the structure on the heap. If this is a
satisfactory fix I can submit it as a patch.
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index c62e956..168d377 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -39,8 +39,6 @@ struct ieee802154_xmit_cb {
struct ieee802154_local *local;
};
-static struct ieee802154_xmit_cb ieee802154_xmit_cb;
-
static void ieee802154_xmit_worker(struct work_struct *work)
{
struct ieee802154_xmit_cb *cb =
@@ -66,6 +64,7 @@ static void ieee802154_xmit_worker(struct work_struct
*work)
dev->stats.tx_bytes += skb->len;
rtnl_unlock();
+ kfree(cb);
return;
@@ -74,6 +73,7 @@ err_tx:
ieee802154_wake_queue(&local->hw);
rtnl_unlock();
kfree_skb(skb);
+ kfree(cb);
netdev_dbg(dev, "transmission failed\n");
}
@@ -81,8 +81,8 @@ static netdev_tx_t
ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
+ struct ieee802154_xmit_cb *ieee802154_xmit_cb_ptr;
int ret;
-
if (!(local->hw.flags & IEEE802154_HW_TX_OMIT_CKSUM)) {
u16 crc = crc_ccitt(0, skb->data, skb->len);
@@ -106,11 +106,11 @@ ieee802154_tx(struct ieee802154_local *local,
struct sk_buff *skb)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
} else {
- INIT_WORK(&ieee802154_xmit_cb.work, ieee802154_xmit_worker);
- ieee802154_xmit_cb.skb = skb;
- ieee802154_xmit_cb.local = local;
-
- queue_work(local->workqueue, &ieee802154_xmit_cb.work);
+ ieee802154_xmit_cb_ptr = kmalloc(sizeof(struct
ieee802154_xmit_cb), GFP_ATOMIC);
+ INIT_WORK(&ieee802154_xmit_cb_ptr->work, ieee802154_xmit_worker);
+ ieee802154_xmit_cb_ptr->skb = skb;
+ ieee802154_xmit_cb_ptr->local = local;
+ queue_work(local->workqueue, &ieee802154_xmit_cb_ptr->work);
}
return NETDEV_TX_OK;
- Simon
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: Kernel crash when using multiple interfaces
2015-05-15 12:54 Kernel crash when using multiple interfaces Simon Vincent
@ 2015-05-15 14:20 ` Alexander Aring
2015-05-15 15:02 ` Simon Vincent
2015-05-15 15:28 ` Alexander Aring
1 sibling, 1 reply; 12+ messages in thread
From: Alexander Aring @ 2015-05-15 14:20 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
Hi Simon.
On Fri, May 15, 2015 at 01:54:10PM +0100, Simon Vincent wrote:
> I have found the Kernel crashes when multiple 802.15.4 interfaces are used
> at the same time.
> I have tracked it down in the kernel to net/mac802154/tx.c
> The problem is the ieee802154_xmit_cb is a global variable so after it has
> been assigned and added to the work queue it can be corrupted/changed by
> another interface transmitting a packet.
>
> I have fixed it by allocating the structure on the heap. If this is a
> satisfactory fix I can submit it as a patch.
>
first, thank you for finding this issue.
Yes, with multiple interfaces this code have issues. When we declare
ieee802154_xmit_cb per phy instead of one for every phy this should be
fixed.
Can you try the following? This should be a solution without calling kmalloc
in this callback:
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index eec668f..c9b38c0 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -28,11 +28,21 @@
#include "llsec.h"
+
/* mac802154 device private data */
struct ieee802154_local {
struct ieee802154_hw hw;
const struct ieee802154_ops *ops;
+ /* xmit worker handler
+ * TODO remove this xmit_sync is debprecated
+ */
+ struct ieee802154_xmit_cb {
+ struct sk_buff *skb;
+ struct work_struct work;
+ struct ieee802154_local *local;
+ } xmit_cb;
+
/* ieee802154 phy */
struct wpan_phy *phy;
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index c62e956..afbddc0 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -30,17 +30,6 @@
#include "ieee802154_i.h"
#include "driver-ops.h"
-/* IEEE 802.15.4 transceivers can sleep during the xmit session, so process
- * packets through the workqueue.
- */
-struct ieee802154_xmit_cb {
- struct sk_buff *skb;
- struct work_struct work;
- struct ieee802154_local *local;
-};
-
-static struct ieee802154_xmit_cb ieee802154_xmit_cb;
-
static void ieee802154_xmit_worker(struct work_struct *work)
{
struct ieee802154_xmit_cb *cb =
@@ -106,11 +95,11 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
} else {
- INIT_WORK(&ieee802154_xmit_cb.work, ieee802154_xmit_worker);
- ieee802154_xmit_cb.skb = skb;
- ieee802154_xmit_cb.local = local;
+ INIT_WORK(&local->xmit_cb.work, ieee802154_xmit_worker);
+ local->xmit_cb.skb = skb;
+ local->xmit_cb.local = local;
- queue_work(local->workqueue, &ieee802154_xmit_cb.work);
+ queue_work(local->workqueue, &local->xmit_cb.work);
}
return NETDEV_TX_OK;
I have anoother question? Which is your use case to use multiple
interfaces? I am still searching for one... The most multiple interfaces
can't be running because if the phy sets registers like (address
filters, promiscuous mode, etc) we can't running another interface with
different mac values (because we have only one registers in the phy).
This means we have a lot of multiple wpan interfaces with the same mac
address which represents one. See [0] which checks on different
settings.
The only one driver (which don't do any mac functionality) is the fakelb
driver. This driver is compareable with a phy without any mac
functionality.
I was thinking about to remove the multiple interface support and only
have one interface which can have different types, so you could morph
node to monitor and backwards for example. But maybe there exists a
real use-case for have multiple interfaces on one phy...
- Alex
[0] http://lxr.free-electrons.com/source/net/mac802154/iface.c#L165
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: Kernel crash when using multiple interfaces
2015-05-15 14:20 ` Alexander Aring
@ 2015-05-15 15:02 ` Simon Vincent
2015-05-15 15:23 ` Alexander Aring
2015-05-16 15:33 ` Alexander Aring
0 siblings, 2 replies; 12+ messages in thread
From: Simon Vincent @ 2015-05-15 15:02 UTC (permalink / raw)
To: Alexander Aring; +Cc: linux-wpan
I tried the solution you proposed but it did not work, it resulted in a
kernel crash (bad paging request).
We require multiple 802.15.4 interfaces for routing between different
802.15.4 networks. We have a 802.15.4 powerline phy (Hanadu [1]) and a
802.15.4 radio phy (MRF24J40). On some boxes we have both phys and route
using RPL between the powerline and radio networks.
[1] http://www.hanadu.org/
- Simon
On 15/05/15 15:20, Alexander Aring wrote:
> Hi Simon.
>
> On Fri, May 15, 2015 at 01:54:10PM +0100, Simon Vincent wrote:
>> I have found the Kernel crashes when multiple 802.15.4 interfaces are used
>> at the same time.
>> I have tracked it down in the kernel to net/mac802154/tx.c
>> The problem is the ieee802154_xmit_cb is a global variable so after it has
>> been assigned and added to the work queue it can be corrupted/changed by
>> another interface transmitting a packet.
>>
>> I have fixed it by allocating the structure on the heap. If this is a
>> satisfactory fix I can submit it as a patch.
>>
> first, thank you for finding this issue.
>
> Yes, with multiple interfaces this code have issues. When we declare
> ieee802154_xmit_cb per phy instead of one for every phy this should be
> fixed.
>
> Can you try the following? This should be a solution without calling kmalloc
> in this callback:
>
> diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
> index eec668f..c9b38c0 100644
> --- a/net/mac802154/ieee802154_i.h
> +++ b/net/mac802154/ieee802154_i.h
> @@ -28,11 +28,21 @@
>
> #include "llsec.h"
>
> +
> /* mac802154 device private data */
> struct ieee802154_local {
> struct ieee802154_hw hw;
> const struct ieee802154_ops *ops;
>
> + /* xmit worker handler
> + * TODO remove this xmit_sync is debprecated
> + */
> + struct ieee802154_xmit_cb {
> + struct sk_buff *skb;
> + struct work_struct work;
> + struct ieee802154_local *local;
> + } xmit_cb;
> +
> /* ieee802154 phy */
> struct wpan_phy *phy;
>
> diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
> index c62e956..afbddc0 100644
> --- a/net/mac802154/tx.c
> +++ b/net/mac802154/tx.c
> @@ -30,17 +30,6 @@
> #include "ieee802154_i.h"
> #include "driver-ops.h"
>
> -/* IEEE 802.15.4 transceivers can sleep during the xmit session, so process
> - * packets through the workqueue.
> - */
> -struct ieee802154_xmit_cb {
> - struct sk_buff *skb;
> - struct work_struct work;
> - struct ieee802154_local *local;
> -};
> -
> -static struct ieee802154_xmit_cb ieee802154_xmit_cb;
> -
> static void ieee802154_xmit_worker(struct work_struct *work)
> {
> struct ieee802154_xmit_cb *cb =
> @@ -106,11 +95,11 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
> dev->stats.tx_packets++;
> dev->stats.tx_bytes += skb->len;
> } else {
> - INIT_WORK(&ieee802154_xmit_cb.work, ieee802154_xmit_worker);
> - ieee802154_xmit_cb.skb = skb;
> - ieee802154_xmit_cb.local = local;
> + INIT_WORK(&local->xmit_cb.work, ieee802154_xmit_worker);
> + local->xmit_cb.skb = skb;
> + local->xmit_cb.local = local;
>
> - queue_work(local->workqueue, &ieee802154_xmit_cb.work);
> + queue_work(local->workqueue, &local->xmit_cb.work);
> }
>
> return NETDEV_TX_OK;
>
>
>
>
> I have anoother question? Which is your use case to use multiple
> interfaces? I am still searching for one... The most multiple interfaces
> can't be running because if the phy sets registers like (address
> filters, promiscuous mode, etc) we can't running another interface with
> different mac values (because we have only one registers in the phy).
>
> This means we have a lot of multiple wpan interfaces with the same mac
> address which represents one. See [0] which checks on different
> settings.
>
> The only one driver (which don't do any mac functionality) is the fakelb
> driver. This driver is compareable with a phy without any mac
> functionality.
>
> I was thinking about to remove the multiple interface support and only
> have one interface which can have different types, so you could morph
> node to monitor and backwards for example. But maybe there exists a
> real use-case for have multiple interfaces on one phy...
>
> - Alex
>
> [0] http://lxr.free-electrons.com/source/net/mac802154/iface.c#L165
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: Kernel crash when using multiple interfaces
2015-05-15 15:02 ` Simon Vincent
@ 2015-05-15 15:23 ` Alexander Aring
2015-05-16 15:33 ` Alexander Aring
1 sibling, 0 replies; 12+ messages in thread
From: Alexander Aring @ 2015-05-15 15:23 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
On Fri, May 15, 2015 at 04:02:32PM +0100, Simon Vincent wrote:
> I tried the solution you proposed but it did not work, it resulted in a
> kernel crash (bad paging request).
>
okay, then simple send a patch with your solution. This should only
affect driver which using xmit_sync callback anyway.
I will notice a point at your previous work.
> We require multiple 802.15.4 interfaces for routing between different
> 802.15.4 networks. We have a 802.15.4 powerline phy (Hanadu [1]) and a
> 802.15.4 radio phy (MRF24J40). On some boxes we have both phys and route
> using RPL between the powerline and radio networks.
>
Okay, then we don't talking about multiple interfaces. We talking about
multiple phy's and that's when the issue occurs, because we have
multiple ieee802154_hw but one global ieee802154_xmit_cb.
My patch should define ieee802154_xmit_cb per ieee802154_hw instead.
Don't know what's your current issue is.
The ieee802154_xmit_cb is protected then by stop/wake queue which
garantuees that this struct isn't used twice (in case of multiple
interfaces), for multiple phy this is currently a bug.
- Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-15 15:02 ` Simon Vincent
2015-05-15 15:23 ` Alexander Aring
@ 2015-05-16 15:33 ` Alexander Aring
2015-05-18 10:57 ` Simon Vincent
1 sibling, 1 reply; 12+ messages in thread
From: Alexander Aring @ 2015-05-16 15:33 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
Hi Simon,
On Fri, May 15, 2015 at 04:02:32PM +0100, Simon Vincent wrote:
> I tried the solution you proposed but it did not work, it resulted in a
> kernel crash (bad paging request).
>
I can now reproduce your issue when I am using the fakelb driver _with_
xmit_sync callback and two vitual phys.
I tried my patch and it's working fine afterwards, can you please check
if the "kernel crash (bad paging request)" isn't another issue or
something failed while applying the diff?
Can you check that again? This issue should be fixed into bluetooth, very
ugly bug.
- Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-16 15:33 ` Alexander Aring
@ 2015-05-18 10:57 ` Simon Vincent
2015-05-18 14:00 ` Alexander Aring
0 siblings, 1 reply; 12+ messages in thread
From: Simon Vincent @ 2015-05-18 10:57 UTC (permalink / raw)
To: Alexander Aring; +Cc: linux-wpan
Hi Alex,
I can confirm your patch still results in the bad paging request.
Maybe you are not seeing it as you have a slightly different setup.
If I understand correctly you have two virtual phys going to a single
wpan interface
e.g.
phy0 -> wpan0
phy1 -> wpan0
My setup is two separate transceivers. Each with its own phy and wpan
interface.
e.g.
phy0 -> wpan0
phy1 -> wpan1
- Simon
On 16/05/15 16:33, Alexander Aring wrote:
> Hi Simon,
>
> On Fri, May 15, 2015 at 04:02:32PM +0100, Simon Vincent wrote:
>> I tried the solution you proposed but it did not work, it resulted in a
>> kernel crash (bad paging request).
>>
> I can now reproduce your issue when I am using the fakelb driver _with_
> xmit_sync callback and two vitual phys.
>
> I tried my patch and it's working fine afterwards, can you please check
> if the "kernel crash (bad paging request)" isn't another issue or
> something failed while applying the diff?
>
> Can you check that again? This issue should be fixed into bluetooth, very
> ugly bug.
>
> - Alex
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wpan" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-18 10:57 ` Simon Vincent
@ 2015-05-18 14:00 ` Alexander Aring
2015-05-18 15:05 ` Simon Vincent
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Aring @ 2015-05-18 14:00 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
Hi Simon,
On Mon, May 18, 2015 at 11:57:19AM +0100, Simon Vincent wrote:
> Hi Alex,
>
> I can confirm your patch still results in the bad paging request.
> Maybe you are not seeing it as you have a slightly different setup.
>
> If I understand correctly you have two virtual phys going to a single wpan
> interface
> e.g.
> phy0 -> wpan0
> phy1 -> wpan0
>
> My setup is two separate transceivers. Each with its own phy and wpan
> interface.
> e.g.
> phy0 -> wpan0
> phy1 -> wpan1
>
From view of mac802154 layer this setup should be the same. Both running
two times alloc_hw and register_hw for alloc/register a wpan phy. The
driver layer is different.
With your solution to putting the worker resource on the heap then the
problem is solved?
Can you provide more information about the "bad paging request" issue? A
stacktrace or something else?
I currently have no idea why you have this issue.
- Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-18 14:00 ` Alexander Aring
@ 2015-05-18 15:05 ` Simon Vincent
2015-05-18 15:37 ` Alexander Aring
0 siblings, 1 reply; 12+ messages in thread
From: Simon Vincent @ 2015-05-18 15:05 UTC (permalink / raw)
To: Alexander Aring; +Cc: linux-wpan
With your patch I get either a "bad paging request" or a NULL pointer
dereference crash at startup. I have not had any problems with my patch.
Here are two stack traces I get.
[ 12.223057] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
(ieee802154_tx+0x6c/0x170)
[ 12.231560] [<c04d6d64>] (ieee802154_tx) from [<c03d30e4>]
(dev_hard_start_xmit+0x224/0x304)
[ 12.239979] [<c03d30e4>] (dev_hard_start_xmit) from [<c03ec118>]
(sch_direct_xmit+0xc4/0x1f4)
[ 12.248474] [<c03ec118>] (sch_direct_xmit) from [<c03d3368>]
(__dev_queue_xmit+0x1a4/0x4c4)
[ 12.256803] [<c03d3368>] (__dev_queue_xmit) from [<c04d6484>]
(lowpan_xmit+0x2a8/0x33c)
[ 12.264787] [<c04d6484>] (lowpan_xmit) from [<c03d30e4>]
(dev_hard_start_xmit+0x224/0x304)
[ 12.273035] [<c03d30e4>] (dev_hard_start_xmit) from [<c03d35e8>]
(__dev_queue_xmit+0x424/0x4c4)
[ 12.281717] [<c03d35e8>] (__dev_queue_xmit) from [<c04684d8>]
(ip6_finish_output2+0x1a0/0x5b4)
[ 12.290308] [<c04684d8>] (ip6_finish_output2) from [<c046c344>]
(ip6_output+0xb0/0x184)
[ 12.298304] [<c046c344>] (ip6_output) from [<c048aa90>]
(mld_sendpack+0x3c8/0x4b8)
[ 12.305851] [<c048aa90>] (mld_sendpack) from [<c048b3d4>]
(mld_ifc_timer_expire+0x1d4/0x300)
[ 12.314284] [<c048b3d4>] (mld_ifc_timer_expire) from [<c006ba44>]
(call_timer_fn.isra.25+0x24/0x98)
[ 12.323301] [<c006ba44>] (call_timer_fn.isra.25) from [<c006bc6c>]
(run_timer_softirq+0x1b4/0x260)
[ 12.332239] [<c006bc6c>] (run_timer_softirq) from [<c0026590>]
(__do_softirq+0x120/0x238)
[ 12.340391] [<c0026590>] (__do_softirq) from [<c0026948>]
(irq_exit+0xc0/0xfc)
[ 12.347599] [<c0026948>] (irq_exit) from [<c005e6bc>]
(__handle_domain_irq+0x80/0xec)
or
[ 12.548824] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
(ieee802154_tx+0x6c/0x170)
[ 12.557326] [<c04d6d64>] (ieee802154_tx) from [<c03d30e4>]
(dev_hard_start_xmit+0x224/0x304)
[ 12.565745] [<c03d30e4>] (dev_hard_start_xmit) from [<c03ec118>]
(sch_direct_xmit+0xc4/0x1f4)
[ 12.574241] [<c03ec118>] (sch_direct_xmit) from [<c03d3368>]
(__dev_queue_xmit+0x1a4/0x4c4)
[ 12.582569] [<c03d3368>] (__dev_queue_xmit) from [<c04d6484>]
(lowpan_xmit+0x2a8/0x33c)
[ 12.590556] [<c04d6484>] (lowpan_xmit) from [<c03d30e4>]
(dev_hard_start_xmit+0x224/0x304)
[ 12.598803] [<c03d30e4>] (dev_hard_start_xmit) from [<c03d35e8>]
(__dev_queue_xmit+0x424/0x4c4)
[ 12.607482] [<c03d35e8>] (__dev_queue_xmit) from [<c04684d8>]
(ip6_finish_output2+0x1a0/0x5b4)
[ 12.616074] [<c04684d8>] (ip6_finish_output2) from [<c046c344>]
(ip6_output+0xb0/0x184)
[ 12.624071] [<c046c344>] (ip6_output) from [<c048aa90>]
(mld_sendpack+0x3c8/0x4b8)
[ 12.631619] [<c048aa90>] (mld_sendpack) from [<c048d484>]
(ipv6_mc_dad_complete+0x2c/0x54)
[ 12.639864] [<c048d484>] (ipv6_mc_dad_complete) from [<c04736f8>]
(addrconf_dad_completed+0x100/0x1ac)
[ 12.649147] [<c04736f8>] (addrconf_dad_completed) from [<c047398c>]
(addrconf_dad_work+0x1e8/0x338)
[ 12.658190] [<c047398c>] (addrconf_dad_work) from [<c0036fa0>]
(process_one_work+0x120/0x330)
[ 12.666687] [<c0036fa0>] (process_one_work) from [<c00371fc>]
(worker_thread+0x4c/0x480)
[ 12.674756] [<c00371fc>] (worker_thread) from [<c003beb0>]
(kthread+0xdc/0xf4)
[ 12.681967] [<c003beb0>] (kthread) from [<c000f4a0>]
(ret_from_fork+0x14/0x34)
[ 12.689157] Code: e58d3004 e59d4004 e1540005 0a00000d (e594303c)
[ 12.695350] ---[ end trace 598762c22717a96a ]---
[ 12.699952] Kernel panic - not syncing: Fatal exception in interrupt
- Simon
On 18/05/15 15:00, Alexander Aring wrote:
> Hi Simon,
>
> On Mon, May 18, 2015 at 11:57:19AM +0100, Simon Vincent wrote:
>> Hi Alex,
>>
>> I can confirm your patch still results in the bad paging request.
>> Maybe you are not seeing it as you have a slightly different setup.
>>
>> If I understand correctly you have two virtual phys going to a single wpan
>> interface
>> e.g.
>> phy0 -> wpan0
>> phy1 -> wpan0
>>
>> My setup is two separate transceivers. Each with its own phy and wpan
>> interface.
>> e.g.
>> phy0 -> wpan0
>> phy1 -> wpan1
>>
> From view of mac802154 layer this setup should be the same. Both running
> two times alloc_hw and register_hw for alloc/register a wpan phy. The
> driver layer is different.
>
> With your solution to putting the worker resource on the heap then the
> problem is solved?
>
> Can you provide more information about the "bad paging request" issue? A
> stacktrace or something else?
>
> I currently have no idea why you have this issue.
>
> - Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-18 15:05 ` Simon Vincent
@ 2015-05-18 15:37 ` Alexander Aring
2015-05-18 16:27 ` Alexander Aring
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Aring @ 2015-05-18 15:37 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
Hi,
On Mon, May 18, 2015 at 04:05:38PM +0100, Simon Vincent wrote:
> With your patch I get either a "bad paging request" or a NULL pointer
> dereference crash at startup. I have not had any problems with my patch.
>
> Here are two stack traces I get.
>
> [ 12.223057] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
>
> or
>
> [ 12.548824] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
Both crashes in ieee802154_stop_queue, but we don't changed anything
which should affect the ieee802154_stop_queue in my or your fix.
I don't know what happens here, why it crashes now in
ieee802154_stop_queue.
I can reproduce the issue (with no patches applied and two lowpan
interface with the reworked fakelb driver). I get now:
BUG: unable to handle kernel NULL pointer dereference at 00000004
IP: [<c013ae6a>] process_one_work+0x29/0x2a5
*pde = 00000000
Oops: 0000 [#1] SMP
Modules linked in:
CPU: 0 PID: 436 Comm: kworker/u2:4 Not tainted 4.1.0-rc3-00545-gd0f8937 #1078
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
task: f73cf4d0 ti: f7184000 task.ti: f7184000
EIP: 0060:[<c013ae6a>] EFLAGS: 00010046 CPU: 0
EIP is at process_one_work+0x29/0x2a5
EAX: 00000000 EBX: f724bac0 ECX: 00000004 EDX: c0e74aec
ESI: f701d400 EDI: f7185ef0 EBP: f7185f0c ESP: f7185edc
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: 000000a4 CR3: 3699b000 CR4: 00000690
Stack:
f734f800 00000000 00000000 c0e74aec f701d400 c0e74ae0 c0b284c0 00000000
c05e743a f724bac0 f701d400 f724bad8 f7185f30 c013b4de f73cf4d0 f701d430
f724bac0 c013b330 f72d0100 f724bac0 c013b330 f7185fac c013e8fa f7185f74
Call Trace:
[<c013b4de>] worker_thread+0x1ae/0x241
[<c013b330>] ? rescuer_thread+0x229/0x229
[<c013b330>] ? rescuer_thread+0x229/0x229
[<c013e8fa>] kthread+0x8f/0x94
[<c0140000>] ? SYSC_reboot+0x141/0x141
[<c0487401>] ret_from_kernel_thread+0x21/0x30
[<c013e86b>] ? __kthread_parkme+0x54/0x54
Code: 5d c3 55 89 e5 57 56 53 89 c3 89 d0 8d 7d e4 83 ec 24 89 55 dc e8 3a dd ff ff 89 45 d8 8b 43 24 b9 04 00 00 00 89 45 e0 8b 45 d8 <8b> 40 04 8b 80 00 01 00 00 c1 e8 05 83 e0 01 88 45 d7 8b 45 dc
EIP: [<c013ae6a>] process_one_work+0x29/0x2a5 SS:ESP 0068:f7185edc
CR2: 0000000000000004
---[ end trace f75bf0513b11ceb0 ]---
BUG: unable to handle kernel paging request at ffffffd0
IP: [<c013ea2f>] kthread_data+0x9/0xe
*pde = 006c7067 *pte = 00000000
Oops: 0000 [#2] SMP
Modules linked in:
CPU: 0 PID: 436 Comm: kworker/u2:4 Tainted: G D 4.1.0-rc3-00545-gd0f8937 #1078
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
task: f73cf4d0 ti: f7184000 task.ti: f7184000
EIP: 0060:[<c013ea2f>] EFLAGS: 00010002 CPU: 0
EIP is at kthread_data+0x9/0xe
EAX: 00000000 EBX: f7800340 ECX: 00000000 EDX: 00000000
ESI: 00000000 EDI: f73cf758 EBP: f7185d74 ESP: f7185d74
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: 00000014 CR3: 3699b000 CR4: 00000690
Stack:
f7185d84 c013b5cc f7800340 00000000 f7185da4 c0483b71 00000000 00000000
f73cf4d0 f7186000 f7185bb4 f7185dd4 f7185db0 c0483f7e f73cf4d0 f7185de8
c012c9cd f73cf8d0 00000001 f73cf6d4 f70413b0 f7185ea0 f7185de0 f6a839ec
Call Trace:
[<c013b5cc>] wq_worker_sleeping+0xc/0x76
[<c0483b71>] __schedule+0x178/0x528
[<c0483f7e>] schedule+0x5d/0x6a
[<c012c9cd>] do_exit+0x749/0x75f
[<c0103e84>] oops_end+0x7b/0x82
[<c0125637>] no_context+0x1b4/0x1be
[<c0152a6f>] ? mark_lock+0x1e/0x1c4
[<c0125767>] __bad_area_nosemaphore+0x126/0x130
[<c04860c7>] ? __mutex_unlock_slowpath+0x10f/0x119
[<c0125dc4>] ? vmalloc_sync_all+0x9c/0x9c
[<c012577e>] bad_area_nosemaphore+0xd/0x10
[<c0125b4e>] __do_page_fault+0x124/0x2fe
[<c0151477>] ? trace_hardirqs_off_caller+0x39/0xa1
[<c0125dc4>] ? vmalloc_sync_all+0x9c/0x9c
[<c0125dcf>] do_page_fault+0xb/0xd
[<c04881bf>] error_code+0x5f/0x70
[<c013007b>] ? bin_intvec+0x6/0x163
[<c0125dc4>] ? vmalloc_sync_all+0x9c/0x9c
[<c013ae6a>] ? process_one_work+0x29/0x2a5
[<c013b4de>] worker_thread+0x1ae/0x241
[<c013b330>] ? rescuer_thread+0x229/0x229
[<c013b330>] ? rescuer_thread+0x229/0x229
[<c013e8fa>] kthread+0x8f/0x94
[<c0140101>] ? async_synchronize_cookie_domain+0x4/0xa2
[<c0487401>] ret_from_kernel_thread+0x21/0x30
[<c013e86b>] ? __kthread_parkme+0x54/0x54
Code: 31 c0 59 5b 5e 5f 5d c3 55 64 a1 0c 67 6b c0 8b 80 5c 02 00 00 89 e5 5d 8b 40 c8 c1 e8 02 83 e0 01 c3 55 8b 80 5c 02 00 00 89 e5 <8b> 40 d0 5d c3 55 b9 04 00 00 00 89 e5 52 8b 90 5c 02 00 00 8d
EIP: [<c013ea2f>] kthread_data+0x9/0xe SS:ESP 0068:f7185d74
CR2: 00000000ffffffd0
---[ end trace f75bf0513b11ceb1 ]---
This is the issue which you should have now at mainline state. I created
a github branch so you can try it yourself [0]. I simple loaded the
fakelb driver and creating lowpan interfaces on each registered phy.
I also created a branch [1] which contains the suggested fix without
running kmalloc. In my case the above error doesn't occur anymore and I
don't have a "bad paging request".
I don't know now what's going on there that your fix works and mine not
on your side, I just want to be sure that I know whats going on there.
If we don't getting to know more, then just send your patch (based on
bluetooth, but should be the same like bluetooth-next). I will test it
then on my side and if it works, then everything is fine.
- Alex
[0] https://github.com/linux-wpan/linux-wpan-next/tree/for_simon_multiple_phy_fail
[1] https://github.com/linux-wpan/linux-wpan-next/tree/for_simon_multiple_phy_works
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-18 15:37 ` Alexander Aring
@ 2015-05-18 16:27 ` Alexander Aring
2015-05-19 11:18 ` Simon Vincent
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Aring @ 2015-05-18 16:27 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
On Mon, May 18, 2015 at 05:37:12PM +0200, Alexander Aring wrote:
> Hi,
>
> On Mon, May 18, 2015 at 04:05:38PM +0100, Simon Vincent wrote:
> > With your patch I get either a "bad paging request" or a NULL pointer
> > dereference crash at startup. I have not had any problems with my patch.
> >
> > Here are two stack traces I get.
> >
> > [ 12.223057] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
> >
> > or
> >
> > [ 12.548824] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
>
> Both crashes in ieee802154_stop_queue, but we don't changed anything
> which should affect the ieee802154_stop_queue in my or your fix.
>
Ah, it could be that it failed in ieee802154_tx after calling
ieee802154_stop_queue. Are you sure that you don't miss some & for
handling with pointers and then we have some invalid address?
- Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-18 16:27 ` Alexander Aring
@ 2015-05-19 11:18 ` Simon Vincent
0 siblings, 0 replies; 12+ messages in thread
From: Simon Vincent @ 2015-05-19 11:18 UTC (permalink / raw)
To: Alexander Aring; +Cc: linux-wpan
Hi Alex,
I am not sure why it was crashing yesterday. I have done a clean build
today and it looks fine.
I would proceed with your patch and I will let you know if I see any
problems in the future.
- Simon
On 18/05/15 17:27, Alexander Aring wrote:
> On Mon, May 18, 2015 at 05:37:12PM +0200, Alexander Aring wrote:
>> Hi,
>>
>> On Mon, May 18, 2015 at 04:05:38PM +0100, Simon Vincent wrote:
>>> With your patch I get either a "bad paging request" or a NULL pointer
>>> dereference crash at startup. I have not had any problems with my patch.
>>>
>>> Here are two stack traces I get.
>>>
>>> [ 12.223057] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
>>>
>>> or
>>>
>>> [ 12.548824] [<c04da1b8>] (ieee802154_stop_queue) from [<c04d6d64>]
>> Both crashes in ieee802154_stop_queue, but we don't changed anything
>> which should affect the ieee802154_stop_queue in my or your fix.
>>
> Ah, it could be that it failed in ieee802154_tx after calling
> ieee802154_stop_queue. Are you sure that you don't miss some & for
> handling with pointers and then we have some invalid address?
>
> - Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: Kernel crash when using multiple interfaces
2015-05-15 12:54 Kernel crash when using multiple interfaces Simon Vincent
2015-05-15 14:20 ` Alexander Aring
@ 2015-05-15 15:28 ` Alexander Aring
1 sibling, 0 replies; 12+ messages in thread
From: Alexander Aring @ 2015-05-15 15:28 UTC (permalink / raw)
To: Simon Vincent; +Cc: linux-wpan
On Fri, May 15, 2015 at 01:54:10PM +0100, Simon Vincent wrote:
> I have found the Kernel crashes when multiple 802.15.4 interfaces are used
> at the same time.
> I have tracked it down in the kernel to net/mac802154/tx.c
> The problem is the ieee802154_xmit_cb is a global variable so after it has
> been assigned and added to the work queue it can be corrupted/changed by
> another interface transmitting a packet.
>
> I have fixed it by allocating the structure on the heap. If this is a
> satisfactory fix I can submit it as a patch.
>
> diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
> index c62e956..168d377 100644
> --- a/net/mac802154/tx.c
> +++ b/net/mac802154/tx.c
> @@ -39,8 +39,6 @@ struct ieee802154_xmit_cb {
> struct ieee802154_local *local;
> };
>
> -static struct ieee802154_xmit_cb ieee802154_xmit_cb;
> -
> static void ieee802154_xmit_worker(struct work_struct *work)
> {
> struct ieee802154_xmit_cb *cb =
> @@ -66,6 +64,7 @@ static void ieee802154_xmit_worker(struct work_struct
> *work)
> dev->stats.tx_bytes += skb->len;
>
> rtnl_unlock();
> + kfree(cb);
>
> return;
>
> @@ -74,6 +73,7 @@ err_tx:
> ieee802154_wake_queue(&local->hw);
> rtnl_unlock();
> kfree_skb(skb);
> + kfree(cb);
> netdev_dbg(dev, "transmission failed\n");
> }
>
> @@ -81,8 +81,8 @@ static netdev_tx_t
> ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
> {
> struct net_device *dev = skb->dev;
> + struct ieee802154_xmit_cb *ieee802154_xmit_cb_ptr;
put this at beginnging of else branch.
> int ret;
> -
> if (!(local->hw.flags & IEEE802154_HW_TX_OMIT_CKSUM)) {
> u16 crc = crc_ccitt(0, skb->data, skb->len);
>
> @@ -106,11 +106,11 @@ ieee802154_tx(struct ieee802154_local *local, struct
> sk_buff *skb)
> dev->stats.tx_packets++;
> dev->stats.tx_bytes += skb->len;
> } else {
struct ieee802154_xmit_cb *ieee802154_xmit_cb_ptr;
> - INIT_WORK(&ieee802154_xmit_cb.work, ieee802154_xmit_worker);
> - ieee802154_xmit_cb.skb = skb;
> - ieee802154_xmit_cb.local = local;
> -
> - queue_work(local->workqueue, &ieee802154_xmit_cb.work);
> + ieee802154_xmit_cb_ptr = kmalloc(sizeof(struct ieee802154_xmit_cb),
> GFP_ATOMIC);
The GFP_ATOMIC should match on the opening brackets of kmalloc.
Example:
ieee802154_xmit_cb_ptr = kmalloc(sizeof(struct ieee802154_xmit_cb),
GFP_ATOMIC);
and add an error handling here.
if (!ieee802154_xmit_cb_ptr) {
ieee802154_wake_queue(&local->hw);
goto err_tx;
}
- Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2015-05-19 11:18 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-15 12:54 Kernel crash when using multiple interfaces Simon Vincent
2015-05-15 14:20 ` Alexander Aring
2015-05-15 15:02 ` Simon Vincent
2015-05-15 15:23 ` Alexander Aring
2015-05-16 15:33 ` Alexander Aring
2015-05-18 10:57 ` Simon Vincent
2015-05-18 14:00 ` Alexander Aring
2015-05-18 15:05 ` Simon Vincent
2015-05-18 15:37 ` Alexander Aring
2015-05-18 16:27 ` Alexander Aring
2015-05-19 11:18 ` Simon Vincent
2015-05-15 15:28 ` Alexander Aring
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox