netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* System blocks (hangs) on ifconfig up
@ 2010-12-12 15:00 Shmulik Hen
  2010-12-12 23:03 ` Ben Hutchings
  2010-12-12 23:29 ` Stephen Hemminger
  0 siblings, 2 replies; 9+ messages in thread
From: Shmulik Hen @ 2010-12-12 15:00 UTC (permalink / raw)
  To: netdev

Hello,

My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.

Whenever I try to bring up a specific ethernet interface for the second 
time, my
system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
screen refresh. etc.

Looking at the driver's code, I could see that it's dev->open() method calls
wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
the delay I'm seeing.

I have narrowed the code to a bare minimum (see below - loosely based on
dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
still, my system blocks for exactly 10 seconds when I run the following
sequence:

 > sudo ifconfig shmulik0 up
 > sudo ifconfig shmulik0 down
 > sudo ifconfig shmulik0 up

At this point - the system is stuck for 10 seconds.


Thanks,
Shmulik.

------------------------------------------------------------------------------

shmulik.c:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/skbuff.h>

MODULE_AUTHOR("Shmulik Hen");
MODULE_DESCRIPTION("Shmulik's sample network driver");
MODULE_LICENSE("GPL");

static struct net_device *g_dev = NULL;

static int shmulik_set_mac_address(struct net_device *dev, void *p)
{
     struct sockaddr *sa = p;

     if (!is_valid_ether_addr(sa->sa_data))
         return -EADDRNOTAVAIL;

     memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
     return 0;
}

static netdev_tx_t shmulik_start_xmit(struct sk_buff *skb, struct 
net_device *dev)
{
     dev_kfree_skb(skb);
     return NETDEV_TX_OK;
}

static int shmulik_open(struct net_device *dev)
{
     mdelay(10000);
     netif_carrier_on(dev);
     netif_start_queue(dev);

     return 0;
}

static int shmulik_close(struct net_device *dev)
{
     netif_stop_queue(dev);
     netif_carrier_off(dev);

     return 0;
}

static const struct net_device_ops shmulik_drv_ops =
{
     .ndo_open            = shmulik_open,
     .ndo_stop            = shmulik_close,
     .ndo_start_xmit      = shmulik_start_xmit,
     .ndo_set_mac_address = shmulik_set_mac_address,
};

static int __init shmulik_drv_init(void)
{
     int rc;
     struct net_device *dev;

     if (g_dev)
         return -EEXIST;

     dev = alloc_etherdev(0);
     if (!dev)
         return -ENOMEM;

     sprintf(dev->name, "%s%d" , "shmulik", 0);
     dev->tx_queue_len = 0;
     dev->flags |= IFF_NOARP;
     dev->flags &= ~IFF_MULTICAST;
     random_ether_addr(dev->dev_addr);

     dev->netdev_ops = &shmulik_drv_ops;

     rc = register_netdev(dev);
     if (rc)
         goto err_exit;

     g_dev = dev;
     return 0;

err_exit:
     free_netdev(dev);
     return rc;
}

static void __exit shmulik_drv_exit(void)
{
     if (g_dev)
     {
         unregister_netdev(g_dev);
         free_netdev(g_dev);
         g_dev = NULL;
     }
}

module_init(shmulik_drv_init);
module_exit(shmulik_drv_exit);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* System blocks (hangs) on ifconfig up
@ 2010-12-12 15:08 Shmulik Hen
  2010-12-12 20:29 ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Shmulik Hen @ 2010-12-12 15:08 UTC (permalink / raw)
  To: netdev

Hello,

My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.

Whenever I try to bring up a specific ethernet interface for the second 
time, my
system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
screen refresh. etc.

Looking at the driver's code, I could see that it's dev->open() method 
calls
wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
the delay I'm seeing.

I have narrowed the code to a bare minimum (see below - loosely based on
dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
still, my system blocks for exactly 10 seconds when I run the following
sequence:

 > sudo ifconfig shmulik0 up
 > sudo ifconfig shmulik0 down
 > sudo ifconfig shmulik0 up

At this point - the system is stuck for 10 seconds.


Thanks,
Shmulik.

------------------------------------------------------------------------------ 


shmulik.c:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/skbuff.h>

MODULE_AUTHOR("Shmulik Hen");
MODULE_DESCRIPTION("Shmulik's sample network driver");
MODULE_LICENSE("GPL");

static struct net_device *g_dev = NULL;

static int shmulik_set_mac_address(struct net_device *dev, void *p)
{
     struct sockaddr *sa = p;

     if (!is_valid_ether_addr(sa->sa_data))
         return -EADDRNOTAVAIL;

     memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
     return 0;
}

static netdev_tx_t shmulik_start_xmit(struct sk_buff *skb, struct 
net_device *dev)
{
     dev_kfree_skb(skb);
     return NETDEV_TX_OK;
}

static int shmulik_open(struct net_device *dev)
{
     mdelay(10000);
     netif_carrier_on(dev);
     netif_start_queue(dev);

     return 0;
}

static int shmulik_close(struct net_device *dev)
{
     netif_stop_queue(dev);
     netif_carrier_off(dev);

     return 0;
}

static const struct net_device_ops shmulik_drv_ops =
{
     .ndo_open            = shmulik_open,
     .ndo_stop            = shmulik_close,
     .ndo_start_xmit      = shmulik_start_xmit,
     .ndo_set_mac_address = shmulik_set_mac_address,
};

static int __init shmulik_drv_init(void)
{
     int rc;
     struct net_device *dev;

     if (g_dev)
         return -EEXIST;

     dev = alloc_etherdev(0);
     if (!dev)
         return -ENOMEM;

     sprintf(dev->name, "%s%d" , "shmulik", 0);
     dev->tx_queue_len = 0;
     dev->flags |= IFF_NOARP;
     dev->flags &= ~IFF_MULTICAST;
     random_ether_addr(dev->dev_addr);

     dev->netdev_ops = &shmulik_drv_ops;

     rc = register_netdev(dev);
     if (rc)
         goto err_exit;

     g_dev = dev;
     return 0;

err_exit:
     free_netdev(dev);
     return rc;
}

static void __exit shmulik_drv_exit(void)
{
     if (g_dev)
     {
         unregister_netdev(g_dev);
         free_netdev(g_dev);
         g_dev = NULL;
     }
}

module_init(shmulik_drv_init);
module_exit(shmulik_drv_exit);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-12 15:08 Shmulik Hen
@ 2010-12-12 20:29 ` Eric Dumazet
  2010-12-12 20:53   ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2010-12-12 20:29 UTC (permalink / raw)
  To: Shmulik Hen; +Cc: netdev

Le dimanche 12 décembre 2010 à 17:08 +0200, Shmulik Hen a écrit :
> Hello,
> 
> My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.
> 
> Whenever I try to bring up a specific ethernet interface for the second 
> time, my
> system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
> screen refresh. etc.
> 
> Looking at the driver's code, I could see that it's dev->open() method 
> calls
> wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
> the delay I'm seeing.
> 

What is this driver ?

> I have narrowed the code to a bare minimum (see below - loosely based on
> dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
> still, my system blocks for exactly 10 seconds when I run the following
> sequence:
> 
>  > sudo ifconfig shmulik0 up
>  > sudo ifconfig shmulik0 down
>  > sudo ifconfig shmulik0 up
> 
> At this point - the system is stuck for 10 seconds.
> 

Certainly not "stuck for 10 seconds"

mdelay(10000) suspends this task for 10 seconds, but other tasks can
certainly run.

However, your shell waits that "ifconfig ... up" finishes.




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-12 20:29 ` Eric Dumazet
@ 2010-12-12 20:53   ` Eric Dumazet
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2010-12-12 20:53 UTC (permalink / raw)
  To: Shmulik Hen; +Cc: netdev

Le dimanche 12 décembre 2010 à 21:29 +0100, Eric Dumazet a écrit :
> Le dimanche 12 décembre 2010 à 17:08 +0200, Shmulik Hen a écrit :
> > Hello,
> > 
> > My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.
> > 
> > Whenever I try to bring up a specific ethernet interface for the second 
> > time, my
> > system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
> > screen refresh. etc.
> > 
> > Looking at the driver's code, I could see that it's dev->open() method 
> > calls
> > wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
> > the delay I'm seeing.
> > 
> 
> What is this driver ?
> 
> > I have narrowed the code to a bare minimum (see below - loosely based on
> > dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
> > still, my system blocks for exactly 10 seconds when I run the following
> > sequence:
> > 
> >  > sudo ifconfig shmulik0 up
> >  > sudo ifconfig shmulik0 down
> >  > sudo ifconfig shmulik0 up
> > 
> > At this point - the system is stuck for 10 seconds.
> > 
> 
> Certainly not "stuck for 10 seconds"
> 
> mdelay(10000) suspends this task for 10 seconds, but other tasks can
> certainly run.
> 
> However, your shell waits that "ifconfig ... up" finishes.
> 
> 

Oops, I forgot mdelay() was a spin primitive, but was misleading by your
wait_event_interruptible_timeout() previous reference.

Doing mdelay(10000) is certainly very lazy, I hope no driver does
that ...




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-12 15:00 System blocks (hangs) on ifconfig up Shmulik Hen
@ 2010-12-12 23:03 ` Ben Hutchings
  2010-12-13  9:14   ` Shmulik Hen
  2010-12-12 23:29 ` Stephen Hemminger
  1 sibling, 1 reply; 9+ messages in thread
From: Ben Hutchings @ 2010-12-12 23:03 UTC (permalink / raw)
  To: Shmulik Hen; +Cc: netdev

On Sun, 2010-12-12 at 17:00 +0200, Shmulik Hen wrote:
> Hello,
> 
> My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.
> 
> Whenever I try to bring up a specific ethernet interface for the second 
> time, my
> system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
> screen refresh. etc.
> 
> Looking at the driver's code, I could see that it's dev->open() method calls
> wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
> the delay I'm seeing.

That seems like a stupid thing for it to do.

> I have narrowed the code to a bare minimum (see below - loosely based on
> dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
> still, my system blocks for exactly 10 seconds when I run the following
> sequence:
> 
>  > sudo ifconfig shmulik0 up
>  > sudo ifconfig shmulik0 down
>  > sudo ifconfig shmulik0 up
> 
> At this point - the system is stuck for 10 seconds.

Bringing an interface up or down is a synchronous operation and is
serialised with most other network configuration operations.  So this is
the expected behaviour.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-12 15:00 System blocks (hangs) on ifconfig up Shmulik Hen
  2010-12-12 23:03 ` Ben Hutchings
@ 2010-12-12 23:29 ` Stephen Hemminger
  1 sibling, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2010-12-12 23:29 UTC (permalink / raw)
  To: Shmulik Hen; +Cc: netdev

On Sun, 12 Dec 2010 17:00:26 +0200
Shmulik Hen <shmulik.hen@gmail.com> wrote:

> Hello,
> 
> My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.
> 
> Whenever I try to bring up a specific ethernet interface for the second 
> time, my
> system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
> screen refresh. etc.
> 
> Looking at the driver's code, I could see that it's dev->open() method calls
> wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
> the delay I'm seeing.
> 
> I have narrowed the code to a bare minimum (see below - loosely based on
> dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
> still, my system blocks for exactly 10 seconds when I run the following
> sequence:
> 
>  > sudo ifconfig shmulik0 up
>  > sudo ifconfig shmulik0 down
>  > sudo ifconfig shmulik0 up
> 
> At this point - the system is stuck for 10 seconds.
> 
> 
> Thanks,
> Shmulik.

What driver is this? Where is the source? It sounds awful

-- 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-12 23:03 ` Ben Hutchings
@ 2010-12-13  9:14   ` Shmulik Hen
  2010-12-13 12:37     ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Shmulik Hen @ 2010-12-13  9:14 UTC (permalink / raw)
  To: netdev; +Cc: Shmulik Hen, Ben Hutchings, eric.dumazet, shemminger

On 12/13/2010 01:03 AM, Ben Hutchings wrote:
> On Sun, 2010-12-12 at 17:00 +0200, Shmulik Hen wrote:
>> Hello,
>>
>> My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.
>>
>> Whenever I try to bring up a specific ethernet interface for the second
>> time, my
>> system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
>> screen refresh. etc.
>>
>> Looking at the driver's code, I could see that it's dev->open() method calls
>> wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
>> the delay I'm seeing.
> That seems like a stupid thing for it to do.
I agree...
>> I have narrowed the code to a bare minimum (see below - loosely based on
>> dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
>> still, my system blocks for exactly 10 seconds when I run the following
>> sequence:
>>
>>   >  sudo ifconfig shmulik0 up
>>   >  sudo ifconfig shmulik0 down
>>   >  sudo ifconfig shmulik0 up
>>
>> At this point - the system is stuck for 10 seconds.
> Bringing an interface up or down is a synchronous operation and is
> serialised with most other network configuration operations.  So this is
> the expected behaviour.
>
> Ben.
But why does this happen only the second time I run ifconfig up?
How come the entire system is totally frozen?
I can't even switch to other applications running. If I run 'top' in another
console, it stops refreshing for the entire period.

I'll try to explain better;
The driver I'm referring to is part of an embedded system development kit.
It runs on the controlling side, which may be a PC or some Linux embedded
system. It exposes a virtual interface that allows to communicate via
ethernet connection to a remote board, and performs the firmware download
to that board.
Unfortunately, the firmware download stage is  done during dev->open() of
this virtual interface. The call to wait_event_interruptible_timeout()
is there to make sure the boot process of the remote board is complete via a
message. If all goes well the first time, there is no delay, but if the 
operation
fails for any reason the first time, and a second attempt is made (another
ifconfig up), we see the freezing.

Since this driver is (mostly) closed source, I had to try and reproduce 
the situation
in an all open-source driver - this is the sample code I attached to my 
original
message. The call to mdelay() there is meant to simulate the delay of 
the original
driver - it schedules.

Obviously, the correct way to fix this is to separate the firmware 
download part
from the dev->open() method, but this is not as simple as it may sound - I'm
currently working on this. In the mean time I'm looking for a simpler 
solution
(or answer) to our problem.

I'll appreciate any insight on this matter.

     Thanks in advance,
     Shmulik Hen.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-13  9:14   ` Shmulik Hen
@ 2010-12-13 12:37     ` Eric Dumazet
  2010-12-13 13:11       ` Shmulik Hen
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2010-12-13 12:37 UTC (permalink / raw)
  To: Shmulik Hen; +Cc: netdev, Shmulik Hen, Ben Hutchings, shemminger

Le lundi 13 décembre 2010 à 11:14 +0200, Shmulik Hen a écrit :
> On 12/13/2010 01:03 AM, Ben Hutchings wrote:
> > On Sun, 2010-12-12 at 17:00 +0200, Shmulik Hen wrote:
> >> Hello,
> >>
> >> My system is Ubuntu 10.04, running kernel 2.6.32-26-generic.
> >>
> >> Whenever I try to bring up a specific ethernet interface for the second
> >> time, my
> >> system becomes unresponsive for 60 seconds - i.e. no mouse, no keyboard, no
> >> screen refresh. etc.
> >>
> >> Looking at the driver's code, I could see that it's dev->open() method calls
> >> wait_event_interruptible_timeout() with a timeout of 60 seconds - exactly
> >> the delay I'm seeing.
> > That seems like a stupid thing for it to do.
> I agree...
> >> I have narrowed the code to a bare minimum (see below - loosely based on
> >> dummy.c), which only calls mdelay(10000) in it's dev->open() method, and
> >> still, my system blocks for exactly 10 seconds when I run the following
> >> sequence:
> >>
> >>   >  sudo ifconfig shmulik0 up
> >>   >  sudo ifconfig shmulik0 down
> >>   >  sudo ifconfig shmulik0 up
> >>
> >> At this point - the system is stuck for 10 seconds.
> > Bringing an interface up or down is a synchronous operation and is
> > serialised with most other network configuration operations.  So this is
> > the expected behaviour.
> >
> > Ben.
> But why does this happen only the second time I run ifconfig up?
> How come the entire system is totally frozen?
> I can't even switch to other applications running. If I run 'top' in another
> console, it stops refreshing for the entire period.
> 
> I'll try to explain better;
> The driver I'm referring to is part of an embedded system development kit.
> It runs on the controlling side, which may be a PC or some Linux embedded
> system. It exposes a virtual interface that allows to communicate via
> ethernet connection to a remote board, and performs the firmware download
> to that board.
> Unfortunately, the firmware download stage is  done during dev->open() of
> this virtual interface. The call to wait_event_interruptible_timeout()
> is there to make sure the boot process of the remote board is complete via a
> message. If all goes well the first time, there is no delay, but if the 
> operation
> fails for any reason the first time, and a second attempt is made (another
> ifconfig up), we see the freezing.
> 
> Since this driver is (mostly) closed source, I had to try and reproduce 
> the situation
> in an all open-source driver - this is the sample code I attached to my 
> original
> message. The call to mdelay() there is meant to simulate the delay of 
> the original
> driver - it schedules.
> 

mdelay() does a busy wait. If you are not SMP, this means a 'freeze'

If you want to schedule, you should use msleep()

> Obviously, the correct way to fix this is to separate the firmware 
> download part
> from the dev->open() method, but this is not as simple as it may sound - I'm
> currently working on this. In the mean time I'm looking for a simpler 
> solution
> (or answer) to our problem.
> 
> I'll appreciate any insight on this matter.
> 
>      Thanks in advance,
>      Shmulik Hen.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: System blocks (hangs) on ifconfig up
  2010-12-13 12:37     ` Eric Dumazet
@ 2010-12-13 13:11       ` Shmulik Hen
  0 siblings, 0 replies; 9+ messages in thread
From: Shmulik Hen @ 2010-12-13 13:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Shmulik Hen, Ben Hutchings, shemminger

On 12/13/2010 02:37 PM, Eric Dumazet wrote:
>>
>> But why does this happen only the second time I run ifconfig up?
>> How come the entire system is totally frozen?
>> I can't even switch to other applications running. If I run 'top' in another
>> console, it stops refreshing for the entire period.
>>
>> I'll try to explain better;
>> The driver I'm referring to is part of an embedded system development kit.
>> It runs on the controlling side, which may be a PC or some Linux embedded
>> system. It exposes a virtual interface that allows to communicate via
>> ethernet connection to a remote board, and performs the firmware download
>> to that board.
>> Unfortunately, the firmware download stage is  done during dev->open() of
>> this virtual interface. The call to wait_event_interruptible_timeout()
>> is there to make sure the boot process of the remote board is complete via a
>> message. If all goes well the first time, there is no delay, but if the
>> operation
>> fails for any reason the first time, and a second attempt is made (another
>> ifconfig up), we see the freezing.
>>
>> Since this driver is (mostly) closed source, I had to try and reproduce
>> the situation
>> in an all open-source driver - this is the sample code I attached to my
>> original
>> message. The call to mdelay() there is meant to simulate the delay of
>> the original
>> driver - it schedules.
>>
> mdelay() does a busy wait. If you are not SMP, this means a 'freeze'
>
> If you want to schedule, you should use msleep()

Correct - my bad. When I use msleep() in the sample code there is no freeze.

But I still don't get it - when using mdelay(), why does the system 
freeze only
the second time but not in the first time?
And what about wait_event_interruptible_timeout()? surely it doesn't
do a busy loop - I can see  the source calling schedule().


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-12-13 13:11 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-12 15:00 System blocks (hangs) on ifconfig up Shmulik Hen
2010-12-12 23:03 ` Ben Hutchings
2010-12-13  9:14   ` Shmulik Hen
2010-12-13 12:37     ` Eric Dumazet
2010-12-13 13:11       ` Shmulik Hen
2010-12-12 23:29 ` Stephen Hemminger
  -- strict thread matches above, loose matches on Subject: below --
2010-12-12 15:08 Shmulik Hen
2010-12-12 20:29 ` Eric Dumazet
2010-12-12 20:53   ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).