Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v2 7/8] tcp: switch tcp_fastopen key generation to net_get_random_once
From: Hannes Frederic Sowa @ 2013-10-05 23:20 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, Hannes Frederic Sowa, Yuchung Cheng, Eric Dumazet,
	David S. Miller
In-Reply-To: <1381015258-7667-1-git-send-email-hannes@stressinduktion.org>

Changed key initialization of tcp_fastopen cookies to net_get_random_once.

If the user sets a custom key net_get_random_once must be called at
least once to ensure we don't overwrite the user provided key when the
first cookie is generated later on.

Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/tcp.h          |  2 +-
 net/ipv4/sysctl_net_ipv4.c |  5 +++++
 net/ipv4/tcp_fastopen.c    | 27 ++++++++++++++++-----------
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9299560..2a26100 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1322,7 +1322,7 @@ extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 int tcp_fastopen_reset_cipher(void *key, unsigned int len);
 void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 			     struct tcp_fastopen_cookie *foc);
-
+void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
 /* Fastopen key context */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c08f096..4b161d5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -274,6 +274,11 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 			ret = -EINVAL;
 			goto bad_key;
 		}
+		/* Generate a dummy secret but don't publish it. This
+		 * is needed so we don't regenerate a new key on the
+		 * first invocation of tcp_fastopen_cookie_gen
+		 */
+		tcp_fastopen_init_key_once(false);
 		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
 	}
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ab7bd35..766032b 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -14,6 +14,20 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 
 static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
 
+void tcp_fastopen_init_key_once(bool publish)
+{
+	static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+	/* tcp_fastopen_reset_cipher publishes the new context
+	 * atomically, so we allow this race happening here.
+	 *
+	 * All call sites of tcp_fastopen_cookie_gen also check
+	 * for a valid cookie, so this is an acceptable risk.
+	 */
+	if (net_get_random_once(key, sizeof(key)) && publish)
+		tcp_fastopen_reset_cipher(key, sizeof(key));
+}
+
 static void tcp_fastopen_ctx_free(struct rcu_head *head)
 {
 	struct tcp_fastopen_context *ctx =
@@ -70,6 +84,8 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 	__be32 path[4] = { src, dst, 0, 0 };
 	struct tcp_fastopen_context *ctx;
 
+	tcp_fastopen_init_key_once(true);
+
 	rcu_read_lock();
 	ctx = rcu_dereference(tcp_fastopen_ctx);
 	if (ctx) {
@@ -78,14 +94,3 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 	}
 	rcu_read_unlock();
 }
-
-static int __init tcp_fastopen_init(void)
-{
-	__u8 key[TCP_FASTOPEN_KEY_LENGTH];
-
-	get_random_bytes(key, sizeof(key));
-	tcp_fastopen_reset_cipher(key, sizeof(key));
-	return 0;
-}
-
-late_initcall(tcp_fastopen_init);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 8/8] net: switch net_secret key generation to net_get_random_once
From: Hannes Frederic Sowa @ 2013-10-05 23:20 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, Hannes Frederic Sowa, Eric Dumazet, David S. Miller
In-Reply-To: <1381015258-7667-1-git-send-email-hannes@stressinduktion.org>

Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 net/core/secure_seq.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 3f1ec15..b02fd16 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -7,6 +7,7 @@
 #include <linux/hrtimer.h>
 #include <linux/ktime.h>
 #include <linux/string.h>
+#include <linux/net.h>
 
 #include <net/secure_seq.h>
 
@@ -16,18 +17,7 @@ static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
 
 static void net_secret_init(void)
 {
-	u32 tmp;
-	int i;
-
-	if (likely(net_secret[0]))
-		return;
-
-	for (i = NET_SECRET_SIZE; i > 0;) {
-		do {
-			get_random_bytes(&tmp, sizeof(tmp));
-		} while (!tmp);
-		cmpxchg(&net_secret[--i], 0, tmp);
-	}
+	net_get_random_once(net_secret, sizeof(net_secret));
 }
 
 #ifdef CONFIG_INET
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net-next v2 3/8] x86/jump_label: expect default_nop if static_key gets enabled on boot-up
From: Steven Rostedt @ 2013-10-06  0:05 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: netdev, linux-kernel, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, Jason Baron, Peter Zijlstra, Eric Dumazet,
	David S. Miller, x86
In-Reply-To: <1381015258-7667-4-git-send-email-hannes@stressinduktion.org>

On Sun,  6 Oct 2013 01:20:53 +0200
Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:

> net_get_random_once(intrduced in the next patch) uses static_keys in
> a way that they get enabled on boot-up instead of replaced with an
> ideal_nop. So check for default_nop on initial enabling.
> 
> Other architectures don't check for this.

But they should.

> 
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Jason Baron <jbaron@redhat.com>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: x86@kernel.org
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---
>  arch/x86/kernel/jump_label.c | 25 ++++++++++++++++++-------
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index ee11b7d..26d5a55 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -42,15 +42,27 @@ static void __jump_label_transform(struct jump_entry *entry,
>  				   int init)
>  {
>  	union jump_code_union code;
> +	const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
>  	const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
>  
>  	if (type == JUMP_LABEL_ENABLE) {
> -		/*
> -		 * We are enabling this jump label. If it is not a nop
> -		 * then something must have gone wrong.
> -		 */
> -		if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> -			bug_at((void *)entry->code, __LINE__);
> +		if (init) {
> +			/*
> +			 * Jump label is enabled for the first time.
> +			 * So we expect a default_nop...
> +			 */
> +			if (unlikely(memcmp((void *)entry->code, default_nop, 5)
> +				     != 0))
> +				bug_at((void *)entry->code, __LINE__);
> +		} else {
> +			/*
> +			 * ...otherwise expect an ideal_nop. Otherwise
> +			 * something went horribly wrong.
> +			 */
> +			if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
> +				     != 0))
> +				bug_at((void *)entry->code, __LINE__);
> +		}

I don't know if I like this change. This is similar to a bug we had
with the Xen folks, where they didn't realize that jump labels are not
suppose to be used (or set) before jump_label_init() is called.

I'll have to take a deeper look at this on Monday.

-- Steve

>  
>  		code.jump = 0xe9;
>  		code.offset = entry->target -
> @@ -63,7 +75,6 @@ static void __jump_label_transform(struct jump_entry *entry,
>  		 * are converting the default nop to the ideal nop.
>  		 */
>  		if (init) {
> -			const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
>  			if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
>  				bug_at((void *)entry->code, __LINE__);
>  		} else {

^ permalink raw reply

* Re: [PATCH net-next v2 3/8] x86/jump_label: expect default_nop if static_key gets enabled on boot-up
From: Hannes Frederic Sowa @ 2013-10-06  0:12 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: netdev, linux-kernel, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, Jason Baron, Peter Zijlstra, Eric Dumazet,
	David S. Miller, x86
In-Reply-To: <20131005200558.3be2f841@gandalf.local.home>

On Sat, Oct 05, 2013 at 08:05:58PM -0400, Steven Rostedt wrote:
> >  	if (type == JUMP_LABEL_ENABLE) {
> > -		/*
> > -		 * We are enabling this jump label. If it is not a nop
> > -		 * then something must have gone wrong.
> > -		 */
> > -		if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> > -			bug_at((void *)entry->code, __LINE__);
> > +		if (init) {
> > +			/*
> > +			 * Jump label is enabled for the first time.
> > +			 * So we expect a default_nop...
> > +			 */
> > +			if (unlikely(memcmp((void *)entry->code, default_nop, 5)
> > +				     != 0))
> > +				bug_at((void *)entry->code, __LINE__);
> > +		} else {
> > +			/*
> > +			 * ...otherwise expect an ideal_nop. Otherwise
> > +			 * something went horribly wrong.
> > +			 */
> > +			if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
> > +				     != 0))
> > +				bug_at((void *)entry->code, __LINE__);
> > +		}
> 
> I don't know if I like this change. This is similar to a bug we had
> with the Xen folks, where they didn't realize that jump labels are not
> suppose to be used (or set) before jump_label_init() is called.
> 
> I'll have to take a deeper look at this on Monday.

Yes, I understand and saw the commit to call jump_label_init
earlier. Maybe the default could be to insert illegal instructions by
default if we try to replace them with nops or branches afterwards anyway.

insn_sanity programs would have to be tought about that, then.

Greetings,

  Hannes

^ permalink raw reply

* Re: [PATCH] 6lowpan: Sync default hardware address of lowpan links to their wpan
From: Alexander Aring @ 2013-10-06  0:24 UTC (permalink / raw)
  To: Alan Ott
  Cc: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller,
	linux-zigbee-devel, netdev, linux-kernel
In-Reply-To: <1381009080-15945-1-git-send-email-alan@signal11.us>

Hi Alan,

On Sat, Oct 05, 2013 at 05:38:00PM -0400, Alan Ott wrote:
> When a lowpan link to a wpan device is created, set the hardware address
> of the lowpan link to that of the wpan device.
> 
> Signed-off-by: Alan Ott <alan@signal11.us>
> ---
>  net/ieee802154/6lowpan.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
> index c85e71e..fb89133 100644
> --- a/net/ieee802154/6lowpan.c
> +++ b/net/ieee802154/6lowpan.c
> @@ -1386,6 +1386,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
>  
>  	entry->ldev = dev;
>  
> +	BUG_ON(IEEE802154_ADDR_LEN != dev->addr_len);
So if somebody make a:
"ip link add link $NOT_8BYTE_HWADDR_DEV name $NAME type lowpan"

the kernel creates a BUG_ON? Okay it seems that case is very unusual but
better is to return a errno so "maybe(I don't know it)" the userspace
software will generate a error and the whole kernel doesn't crash.

> +	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
Is there one case where we read the dev->dev_addr? I saw a case when we
set this [1], but I don't know why we need this. Did you detect some
problems because this isn't the "real" hw addr?

Other case is I am feeling uneasy when we have two netdev devices with
the same hw addr.

- Alex

[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/net/ieee802154/6lowpan.c?id=refs/tags/v3.12-rc3#n1102

^ permalink raw reply

* Re: [PATCH] 6lowpan: Sync default hardware address of lowpan links to their wpan
From: Alan Ott @ 2013-10-06  1:01 UTC (permalink / raw)
  To: Alexander Aring
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	David S. Miller
In-Reply-To: <20131006002451.GA22623@omega>

On 10/05/2013 08:24 PM, Alexander Aring wrote:
> Hi Alan,
>
> On Sat, Oct 05, 2013 at 05:38:00PM -0400, Alan Ott wrote:
>> When a lowpan link to a wpan device is created, set the hardware address
>> of the lowpan link to that of the wpan device.
>>
>> Signed-off-by: Alan Ott<alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
>> ---
>>   net/ieee802154/6lowpan.c | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
>> index c85e71e..fb89133 100644
>> --- a/net/ieee802154/6lowpan.c
>> +++ b/net/ieee802154/6lowpan.c
>> @@ -1386,6 +1386,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
>>   
>>   	entry->ldev = dev;
>>   
>> +	BUG_ON(IEEE802154_ADDR_LEN != dev->addr_len);
> So if somebody make a:
> "ip link add link $NOT_8BYTE_HWADDR_DEV name $NAME type lowpan"
>
> the kernel creates a BUG_ON? Okay it seems that case is very unusual but
> better is to return a errno so "maybe(I don't know it)" the userspace
> software will generate a error and the whole kernel doesn't crash.

That line checks the length of the address of the lowpan device, not the 
real device the lowpan device is attached to (and on second look, I 
don't like the Yoda code; didn't notice that before).

Further, running:
     ip link add link eth0 name lowpan0 type lowpan

like you suggest, crashes the kernel hard, with or without my patch. 
More stuff to fix....


>> +	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
> Is there one case where we read the dev->dev_addr? I saw a case when we
> set this [1], but I don't know why we need this. Did you detect some
> problems because this isn't the "real" hw addr?
>
> Other case is I am feeling uneasy when we have two netdev devices with
> the same hw addr.
> [1]https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/net/ieee802154/6lowpan.c?id=refs/tags/v3.12-rc3#n1102

In my testing, lowpan devices need a hardware address which is the same 
as the wpan, or else stuff like ping doesn't work at all. In all the 
examples I've ever seen, the lowpan's hardware address has been set 
manually.

What does your test setup look like that you don't have a hardware 
address for your lowpan device? Are you able to ping other Linux hosts?

Alan.


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply

* Re: [PATCH] 6lowpan: Sync default hardware address of lowpan links to their wpan
From: Alexander Aring @ 2013-10-06  1:29 UTC (permalink / raw)
  To: Alan Ott
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	David S. Miller
In-Reply-To: <5250B680.2080301-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

On Sat, Oct 05, 2013 at 09:01:52PM -0400, Alan Ott wrote:
> On 10/05/2013 08:24 PM, Alexander Aring wrote:
> >Hi Alan,
> >
> >On Sat, Oct 05, 2013 at 05:38:00PM -0400, Alan Ott wrote:
> >>When a lowpan link to a wpan device is created, set the hardware address
> >>of the lowpan link to that of the wpan device.
> >>
> >>Signed-off-by: Alan Ott<alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
> >>---
> >>  net/ieee802154/6lowpan.c | 3 +++
> >>  1 file changed, 3 insertions(+)
> >>
> >>diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
> >>index c85e71e..fb89133 100644
> >>--- a/net/ieee802154/6lowpan.c
> >>+++ b/net/ieee802154/6lowpan.c
> >>@@ -1386,6 +1386,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
> >>  	entry->ldev = dev;
> >>+	BUG_ON(IEEE802154_ADDR_LEN != dev->addr_len);
> >So if somebody make a:
> >"ip link add link $NOT_8BYTE_HWADDR_DEV name $NAME type lowpan"
> >
> >the kernel creates a BUG_ON? Okay it seems that case is very unusual but
> >better is to return a errno so "maybe(I don't know it)" the userspace
> >software will generate a error and the whole kernel doesn't crash.
> 
> That line checks the length of the address of the lowpan device, not
> the real device the lowpan device is attached to (and on second
> look, I don't like the Yoda code; didn't notice that before).
> 
> Further, running:
>     ip link add link eth0 name lowpan0 type lowpan
> 
> like you suggest, crashes the kernel hard, with or without my patch.
> More stuff to fix....
>

Okay isn't the dev->addr_len some static value?

Maybe we should check than for:

if (real_dev->type != ARPHRD_IEEE802154)
	return -EINVAL;

after we set real_dev. It's all ARPHRD_IEEE802154 specific code.

> 
> >>+	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
> >Is there one case where we read the dev->dev_addr? I saw a case when we
> >set this [1], but I don't know why we need this. Did you detect some
> >problems because this isn't the "real" hw addr?
> >
> >Other case is I am feeling uneasy when we have two netdev devices with
> >the same hw addr.
> >[1]https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/net/ieee802154/6lowpan.c?id=refs/tags/v3.12-rc3#n1102
> 
> In my testing, lowpan devices need a hardware address which is the
> same as the wpan, or else stuff like ping doesn't work at all. In
> all the examples I've ever seen, the lowpan's hardware address has
> been set manually.
> 
Ah ok.

> What does your test setup look like that you don't have a hardware
> address for your lowpan device? Are you able to ping other Linux
> hosts?
> 

I have a little script for setting up a lowpan device.
I set the hardware address with:

ip link set $LOWPAN_DEV address $EUI64_ADDR

after I add the lowpan link. I am able to ping linux and contiki(without
fragmentation).

- Alex

------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply

* Re: [PATCH net-next] net: Separate the close_list and the unreg_list
From: Francesco Ruggeri @ 2013-10-05 13:18 UTC (permalink / raw)
  To: fruggeri, netdev, ebiederm

Hi Eric,
I am running some more extensive tests with this patch and I ran into the crash below.

It may be caused by

@@ -1301,7 +1301,7 @@ int dev_close(struct net_device *dev)
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 
-		list_add(&dev->unreg_list, &single);
+		list_add(&dev->close_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
 	}

dev_close_many expects net_devices to be linked by unreg_list.
Let me know what you think.

Thanks,
Francesco

<1>BUG: unable to handle kernel NULL pointer dereference at 0000000000000190
<1>IP: [<ffffffff8142c709>] packet_notifier+0x1e/0x163
<4>PGD 94f815067 PUD 0 
<4>Oops: 0000 [#1] SMP 
<4>CPU 1 
<4>Modules linked in: dummy loop tulip veth xt_tcpudp iptable_filter nfsd lockd nfs_acl auth_rpcgss exportfs sunrpc cpufreq_ondemand acpi_cpufreq freq_table mperf macvlan dm_mirror dm_region_hash dm_log uinput ip6table_filter ip6_tables bonding kvm_intel kvm fuse xt_multiport iptable_nat ip_tables nf_nat x_tables nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 tun 8021q coretemp hwmon crc32c_intel ghash_clmulni_intel sg joydev iTCO_wdt iTCO_vendor_support pcspkr i2c_i801 microcode i2c_core ioatdma dca raid1 aesni_intel cryptd aes_x86_64 aes_generic isci libsas scsi_transport_sas ehci_hcd igb wmi dm_mod [last unloaded: scsi_wait_scan]
<4>
<4>Pid: 14572, comm: Ira Not tainted 3.4.43-1479218.2011fruggeriArora.1.fc14.x86_64 #1 Supermicro X9DRT/X9DRT
<4>RIP: 0010:[<ffffffff8142c709>]  [<ffffffff8142c709>] packet_notifier+0x1e/0x163
<4>RSP: 0018:ffff881006c1f668  EFLAGS: 00210292
<4>RAX: 0000000000000000 RBX: ffff88108dec1810 RCX: ffffffff81a6df10
<4>RDX: ffff88108dec1810 RSI: 0000000000000009 RDI: ffffffff81a6df10
<4>RBP: ffff881006c1f698 R08: 0000000000000000 R09: 0000000000000003
<4>R10: 0000000000000003 R11: 0000000000000000 R12: ffff88108dec1810
<4>R13: 00000000fffffff3 R14: ffffffffa0207050 R15: 0000000000000009
<4>FS:  0000000000000000(0000) GS:ffff88103fc20000(0063) knlGS:00000000f5630b00
<4>CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
<4>CR2: 0000000000000190 CR3: 0000000b405bb000 CR4: 00000000000007e0
<4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
<4>Process Ira (pid: 14572, threadinfo ffff881006c1e000, task ffff88094f8d8da0)
<4>Stack:
<4> ffff881006c1f698 ffff88108dec1810 0000000000000009 00000000fffffff3
<4> ffffffffa0207050 ffffffffa02f2060 ffff881006c1f6d8 ffffffff814513a5
<4> ffffea000a276500 0000000000000000 0000000000000009 ffff88108dec1810
<4>Call Trace:
<4> [<ffffffff814513a5>] notifier_call_chain+0x32/0x5e
<4> [<ffffffff8104ec06>] raw_notifier_call_chain+0xf/0x11
<4> [<ffffffff813848f6>] call_netdevice_notifiers+0x45/0x4a
<4> [<ffffffff8138493d>] __dev_close_many+0x42/0xbc
<4> [<ffffffff81384a67>] dev_close_many+0x6e/0xf8
<4> [<ffffffff81384b2b>] dev_close+0x3a/0x4d
<4> [<ffffffff81386b54>] dev_change_net_namespace+0xa4/0x1a0
<4> [<ffffffff8139249e>] do_setlink+0x77/0x78b
<4> [<ffffffff810cb555>] ? pmd_offset+0x14/0x3b
<4> [<ffffffff8144e0b9>] ? _raw_spin_lock+0x9/0xb
<4> [<ffffffff810f8317>] ? full_name_hash+0x19/0x5c
<4> [<ffffffff8138661c>] ? dev_name_hash.clone.50+0x24/0x3c
<4> [<ffffffff81393a9b>] rtnl_newlink+0x24d/0x49a
<4> [<ffffffff8139390d>] ? rtnl_newlink+0xbf/0x49a
<4> [<ffffffff8144e0e9>] ? _raw_spin_unlock_irqrestore+0x12/0x14
<4> [<ffffffff810e7c3b>] ? virt_to_head_page+0x9/0x2c
<4> [<ffffffff8139364f>] rtnetlink_rcv_msg+0x24c/0x262
<4> [<ffffffff81393403>] ? __rtnl_unlock+0x12/0x12
<4> [<ffffffff813a7e44>] netlink_rcv_skb+0x40/0x8c
<4> [<ffffffff81392d80>] rtnetlink_rcv+0x21/0x28
<4> [<ffffffff813a795b>] netlink_unicast+0xec/0x155
<4> [<ffffffff813a7c4c>] netlink_sendmsg+0x288/0x2a6
<4> [<ffffffff8137414a>] __sock_sendmsg+0x6e/0x7a
<4> [<ffffffff81374a29>] sock_sendmsg+0xa3/0xbc
<4> [<ffffffff810b195a>] ? generic_file_aio_write+0xa8/0xb8
<4> [<ffffffff811665b9>] ? ext4_file_write+0x1eb/0x240
<4> [<ffffffff810f1247>] ? fget_light+0x33/0x83
<4> [<ffffffff81374aaa>] ? sockfd_lookup_light+0x1b/0x53
<4> [<ffffffff81375f7b>] sys_sendto+0x12a/0x16c
<4> [<ffffffff810ef8a3>] ? fsnotify_modify+0x5d/0x65
<4> [<ffffffff81375fcc>] sys_send+0xf/0x11
<4> [<ffffffff8139cc9c>] compat_sys_socketcall+0xd7/0x19b
<4> [<ffffffff81455bd6>] sysenter_dispatch+0x7/0x21
<4>Code: ff ff 80 8b 48 04 00 00 01 5b 5b c9 c3 55 48 89 e5 41 57 49 89 f7 41 56 41 55 41 54 49 89 d4 53 48 83 ec 08 48 8b 82 48 04 00 00 <4c> 8b b0 90 01 00 00 e9 06 01 00 00 4c 8b ab 58 04 00 00 4d 85 
<1>RIP  [<ffffffff8142c709>] packet_notifier+0x1e/0x163
<4> RSP <ffff881006c1f668>
<4>CR2: 0000000000000190

^ permalink raw reply

* Re: [PATCH net-next] net: Separate the close_list and the unreg_list
From: Eric W. Biederman @ 2013-10-06  2:13 UTC (permalink / raw)
  To: Francesco Ruggeri; +Cc: netdev
In-Reply-To: <5250c0b6.45dc420a.738b.6a58@mx.google.com>

Francesco Ruggeri <fruggeri@aristanetworks.com> writes:

> Hi Eric,
> I am running some more extensive tests with this patch and I ran into
> the crash below.
>
> It may be caused by
>
> @@ -1301,7 +1301,7 @@ int dev_close(struct net_device *dev)
>  	if (dev->flags & IFF_UP) {
>  		LIST_HEAD(single);
>  
> -		list_add(&dev->unreg_list, &single);
> +		list_add(&dev->close_list, &single);
>  		dev_close_many(&single);
>  		list_del(&single);
>  	}
>
> dev_close_many expects net_devices to be linked by unreg_list.
> Let me know what you think.

Yes.  There does appear to be a logic problem there.
Grr.

It looks like the least error prone approach is to simply have
dev_close_many consume it's list.

Can you verify this incremental change fixes it for you?

This changes all of the callers to build the close_list before
calling dev_close_many.  Which makes it safe to muck with the close
list however we want.

---

diff --git a/net/core/dev.c b/net/core/dev.c
index c8db0bfc36d6..fa0b2b06c1a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1362,16 +1362,15 @@ static int __dev_close(struct net_device *dev)
 static int dev_close_many(struct list_head *head)
 {
        struct net_device *dev, *tmp;
-       LIST_HEAD(many);
 
-       /* rollback_registered_many needs the original unmodified list */
-       list_for_each_entry(dev, head, unreg_list)
-               if (dev->flags & IFF_UP)
-                       list_add_tail(&dev->close_list, &many);
+       /* Remove the devices that don't need to be closed */
+       list_for_each_entry_safe(dev, tmp, head, close_list)
+               if (!(dev->flags & IFF_UP))
+                       list_del_init(&dev->close_list);
 
-       __dev_close_many(&many);
+       __dev_close_many(head);
 
-       list_for_each_entry_safe(dev, tmp, &many, close_list) {
+       list_for_each_entry_safe(dev, tmp, head, close_list) {
                rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
                call_netdevice_notifiers(NETDEV_DOWN, dev);
                list_del_init(&dev->close_list);
@@ -5439,6 +5438,7 @@ static void net_set_todo(struct net_device *dev)
 static void rollback_registered_many(struct list_head *head)
 {
        struct net_device *dev, *tmp;
+       LIST_HEAD(close_head);
 
        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();
@@ -5461,7 +5461,9 @@ static void rollback_registered_many(struct list_head *head)
        }
 
        /* If device is running, close it first. */
-       dev_close_many(head);
+       list_for_each_entry(dev, head, unreg_list)
+               list_add_tail(&dev->close_list, &close_head);
+       dev_close_many(&close_head);
 
        list_for_each_entry(dev, head, unreg_list) {
                /* And unlink it from device chain. */

^ permalink raw reply related

* [PATCH] net: Separate the close_list and the unreg_list v2
From: Eric W. Biederman @ 2013-10-06  2:26 UTC (permalink / raw)
  To: Francesco Ruggeri; +Cc: netdev
In-Reply-To: <5250c0b6.45dc420a.738b.6a58@mx.google.com>


Separate the unreg_list and the close_list in dev_close_many preventing
dev_close_many from permuting the unreg_list.  The permutations of the
unreg_list have resulted in cases where the loopback device is accessed
it has been freed in code such as dst_ifdown.  Resulting in subtle memory
corruption.

This is the second bug from sharing the storage between the close_list
and the unreg_list.  The issues that crop up with sharing are
apparently too subtle to show up in normal testing or usage, so let's
forget about being clever and use two separate lists.

v2: Make all callers pass in a close_list to dev_close_many

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---

Sending the complete diff because this version is actually more
readable and more obviously correct.

 include/linux/netdevice.h |    1 +
 net/core/dev.c            |   25 ++++++++++++++-----------
 net/sched/sch_generic.c   |    6 +++---
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f5cd464271bf..6d77e0f3cc10 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1143,6 +1143,7 @@ struct net_device {
 	struct list_head	dev_list;
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
+	struct list_head	close_list;
 
 	/* directly linked devices, like slaves for bonding */
 	struct {
diff --git a/net/core/dev.c b/net/core/dev.c
index c25db20a4246..fa0b2b06c1a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head)
 	ASSERT_RTNL();
 	might_sleep();
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
 		clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head)
 
 	dev_deactivate_many(head);
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		/*
@@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev)
 	/* Temporarily disable netpoll until the interface is down */
 	netpoll_rx_disable(dev);
 
-	list_add(&dev->unreg_list, &single);
+	list_add(&dev->close_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
 
@@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev)
 static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
-	LIST_HEAD(tmp_list);
 
-	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+	/* Remove the devices that don't need to be closed */
+	list_for_each_entry_safe(dev, tmp, head, close_list)
 		if (!(dev->flags & IFF_UP))
-			list_move(&dev->unreg_list, &tmp_list);
+			list_del_init(&dev->close_list);
 
 	__dev_close_many(head);
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry_safe(dev, tmp, head, close_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
+		list_del_init(&dev->close_list);
 	}
 
-	/* rollback_registered_many needs the complete original list */
-	list_splice(&tmp_list, head);
 	return 0;
 }
 
@@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev)
 		/* Block netpoll rx while the interface is going down */
 		netpoll_rx_disable(dev);
 
-		list_add(&dev->unreg_list, &single);
+		list_add(&dev->close_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
 
@@ -5439,6 +5438,7 @@ static void net_set_todo(struct net_device *dev)
 static void rollback_registered_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
+	LIST_HEAD(close_head);
 
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
@@ -5461,7 +5461,9 @@ static void rollback_registered_many(struct list_head *head)
 	}
 
 	/* If device is running, close it first. */
-	dev_close_many(head);
+	list_for_each_entry(dev, head, unreg_list)
+		list_add_tail(&dev->close_list, &close_head);
+	dev_close_many(&close_head);
 
 	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
@@ -6257,6 +6259,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->close_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
 	INIT_LIST_HEAD(&dev->adj_list.upper);
 	INIT_LIST_HEAD(&dev->adj_list.lower);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e7121d29c4bd..7fc899a943a8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head)
 	struct net_device *dev;
 	bool sync_needed = false;
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
 					 &noop_qdisc);
 		if (dev_ingress_queue(dev))
@@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head)
 		synchronize_net();
 
 	/* Wait for outstanding qdisc_run calls. */
-	list_for_each_entry(dev, head, unreg_list)
+	list_for_each_entry(dev, head, close_list)
 		while (some_qdisc_is_busy(dev))
 			yield();
 }
@@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev)
 {
 	LIST_HEAD(single);
 
-	list_add(&dev->unreg_list, &single);
+	list_add(&dev->close_list, &single);
 	dev_deactivate_many(&single);
 	list_del(&single);
 }
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH] eisa: standardize on eisa_register_driver like similar bus registrations
From: Matthew Whitehead @ 2013-10-06  2:35 UTC (permalink / raw)
  To: netdev; +Cc: linux-scsi, Matthew Whitehead

The other buses (isa, pci, pnp, parport, usb, tty, etc) all use the convention
of ${BUSNAME}_register_driver. Rewrite the little remaining code that uses EISA
to follow this convention for easier readability.

This affects the EISA bus, SCSI, and networking subsystems so only one should
ultimately merge the patch if it is accepted.

Signed-off-by: Matthew Whitehead <tedheadster@gmail.com>
---
 drivers/eisa/eisa-bus.c                |    8 ++++----
 drivers/net/ethernet/3com/3c509.c      |    4 ++--
 drivers/net/ethernet/3com/3c59x.c      |    6 +++---
 drivers/net/ethernet/dec/tulip/de4x5.c |    4 ++--
 drivers/net/ethernet/hp/hp100.c        |    6 +++---
 drivers/net/fddi/defxx.c               |    4 ++--
 drivers/scsi/advansys.c                |    6 +++---
 drivers/scsi/aha1740.c                 |    4 ++--
 drivers/scsi/aic7xxx/aic7770_osm.c     |    4 ++--
 drivers/scsi/sim710.c                  |    4 ++--
 include/linux/eisa.h                   |    8 ++++----
 11 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c
index 272a3ec..9fe589b 100644
--- a/drivers/eisa/eisa-bus.c
+++ b/drivers/eisa/eisa-bus.c
@@ -143,18 +143,18 @@ struct bus_type eisa_bus_type = {
 };
 EXPORT_SYMBOL(eisa_bus_type);
 
-int eisa_driver_register(struct eisa_driver *edrv)
+int eisa_register_driver(struct eisa_driver *edrv)
 {
 	edrv->driver.bus = &eisa_bus_type;
 	return driver_register(&edrv->driver);
 }
-EXPORT_SYMBOL(eisa_driver_register);
+EXPORT_SYMBOL(eisa_register_driver);
 
-void eisa_driver_unregister(struct eisa_driver *edrv)
+void eisa_unregister_driver(struct eisa_driver *edrv)
 {
 	driver_unregister(&edrv->driver);
 }
-EXPORT_SYMBOL(eisa_driver_unregister);
+EXPORT_SYMBOL(eisa_unregister_driver);
 
 static ssize_t eisa_show_sig(struct device *dev, struct device_attribute *attr,
 			     char *buf)
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index ede8daa..ddfc2f0 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -1417,7 +1417,7 @@ static int __init el3_init_module(void)
 			isa_registered = 1;
 	}
 #ifdef CONFIG_EISA
-	ret = eisa_driver_register(&el3_eisa_driver);
+	ret = eisa_register_driver(&el3_eisa_driver);
 	if (!ret)
 		eisa_registered = 1;
 #endif
@@ -1447,7 +1447,7 @@ static void __exit el3_cleanup_module(void)
 		release_region(id_port, 1);
 #ifdef CONFIG_EISA
 	if (eisa_registered)
-		eisa_driver_unregister(&el3_eisa_driver);
+		eisa_unregister_driver(&el3_eisa_driver);
 #endif
 }
 
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index ad5272b..df22872 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -976,12 +976,12 @@ static int __init vortex_eisa_init(void)
 #ifdef CONFIG_EISA
 	int err;
 
-	err = eisa_driver_register (&vortex_eisa_driver);
+	err = eisa_register_driver (&vortex_eisa_driver);
 	if (!err) {
 		/*
 		 * Because of the way EISA bus is probed, we cannot assume
 		 * any device have been found when we exit from
-		 * eisa_driver_register (the bus root driver may not be
+		 * eisa_register_driver (the bus root driver may not be
 		 * initialized yet). So we blindly assume something was
 		 * found, and let the sysfs magic happened...
 		 */
@@ -3295,7 +3295,7 @@ static void __exit vortex_eisa_cleanup(void)
 
 #ifdef CONFIG_EISA
 	/* Take care of the EISA devices */
-	eisa_driver_unregister(&vortex_eisa_driver);
+	eisa_unregister_driver(&vortex_eisa_driver);
 #endif
 
 	if (compaq_net_device) {
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 263b92c..1df85a1 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -5570,7 +5570,7 @@ static int __init de4x5_module_init (void)
 	err = pci_register_driver(&de4x5_pci_driver);
 #endif
 #ifdef CONFIG_EISA
-	err |= eisa_driver_register (&de4x5_eisa_driver);
+	err |= eisa_register_driver (&de4x5_eisa_driver);
 #endif
 
 	return err;
@@ -5582,7 +5582,7 @@ static void __exit de4x5_module_exit (void)
 	pci_unregister_driver (&de4x5_pci_driver);
 #endif
 #ifdef CONFIG_EISA
-	eisa_driver_unregister (&de4x5_eisa_driver);
+	eisa_unregister_driver (&de4x5_eisa_driver);
 #endif
 }
 
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 91227d0..bf817ff 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -3030,7 +3030,7 @@ static int __init hp100_module_init(void)
 	if (err && err != -ENODEV)
 		goto out;
 #ifdef CONFIG_EISA
-	err = eisa_driver_register(&hp100_eisa_driver);
+	err = eisa_register_driver(&hp100_eisa_driver);
 	if (err && err != -ENODEV)
 		goto out2;
 #endif
@@ -3043,7 +3043,7 @@ static int __init hp100_module_init(void)
 	return err;
  out3:
 #ifdef CONFIG_EISA
-	eisa_driver_unregister (&hp100_eisa_driver);
+	eisa_unregister_driver (&hp100_eisa_driver);
  out2:
 #endif
 	hp100_isa_cleanup();
@@ -3055,7 +3055,7 @@ static void __exit hp100_module_exit(void)
 {
 	hp100_isa_cleanup();
 #ifdef CONFIG_EISA
-	eisa_driver_unregister (&hp100_eisa_driver);
+	eisa_unregister_driver (&hp100_eisa_driver);
 #endif
 #ifdef CONFIG_PCI
 	pci_unregister_driver (&hp100_pci_driver);
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 0b40e1c..01ce473 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -3713,7 +3713,7 @@ static int dfx_init(void)
 
 	status = pci_register_driver(&dfx_pci_driver);
 	if (!status)
-		status = eisa_driver_register(&dfx_eisa_driver);
+		status = eisa_register_driver(&dfx_eisa_driver);
 	if (!status)
 		status = tc_register_driver(&dfx_tc_driver);
 	return status;
@@ -3722,7 +3722,7 @@ static int dfx_init(void)
 static void dfx_cleanup(void)
 {
 	tc_unregister_driver(&dfx_tc_driver);
-	eisa_driver_unregister(&dfx_eisa_driver);
+	eisa_unregister_driver(&dfx_eisa_driver);
 	pci_unregister_driver(&dfx_pci_driver);
 }
 
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index c67e401..17451e8 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -12307,7 +12307,7 @@ static int __init advansys_init(void)
 	if (error)
 		goto unregister_isa;
 
-	error = eisa_driver_register(&advansys_eisa_driver);
+	error = eisa_register_driver(&advansys_eisa_driver);
 	if (error)
 		goto unregister_vlb;
 
@@ -12318,7 +12318,7 @@ static int __init advansys_init(void)
 	return 0;
 
  unregister_eisa:
-	eisa_driver_unregister(&advansys_eisa_driver);
+	eisa_unregister_driver(&advansys_eisa_driver);
  unregister_vlb:
 	isa_unregister_driver(&advansys_vlb_driver);
  unregister_isa:
@@ -12330,7 +12330,7 @@ static int __init advansys_init(void)
 static void __exit advansys_exit(void)
 {
 	pci_unregister_driver(&advansys_pci_driver);
-	eisa_driver_unregister(&advansys_eisa_driver);
+	eisa_unregister_driver(&advansys_eisa_driver);
 	isa_unregister_driver(&advansys_vlb_driver);
 	isa_unregister_driver(&advansys_isa_driver);
 }
diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c
index 5f31017..5f6bfed 100644
--- a/drivers/scsi/aha1740.c
+++ b/drivers/scsi/aha1740.c
@@ -664,12 +664,12 @@ static struct eisa_driver aha1740_driver = {
 
 static __init int aha1740_init (void)
 {
-	return eisa_driver_register (&aha1740_driver);
+	return eisa_register_driver (&aha1740_driver);
 }
 
 static __exit void aha1740_exit (void)
 {
-	eisa_driver_unregister (&aha1740_driver);
+	eisa_unregister_driver (&aha1740_driver);
 }
 
 module_init (aha1740_init);
diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c
index 0cb8ef6..cbe99e4 100644
--- a/drivers/scsi/aic7xxx/aic7770_osm.c
+++ b/drivers/scsi/aic7xxx/aic7770_osm.c
@@ -146,11 +146,11 @@ static struct eisa_driver aic7770_driver = {
 int
 ahc_linux_eisa_init(void)
 {
-	return eisa_driver_register(&aic7770_driver);
+	return eisa_register_driver(&aic7770_driver);
 }
   
 void
 ahc_linux_eisa_exit(void)
 {
-	eisa_driver_unregister(&aic7770_driver);
+	eisa_unregister_driver(&aic7770_driver);
 }
diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c
index 3b3b56f..d5c20cf 100644
--- a/drivers/scsi/sim710.c
+++ b/drivers/scsi/sim710.c
@@ -235,7 +235,7 @@ static int __init sim710_init(void)
 #endif
 
 #ifdef CONFIG_EISA
-	err = eisa_driver_register(&sim710_eisa_driver);
+	err = eisa_register_driver(&sim710_eisa_driver);
 #endif
 	/* FIXME: what we'd really like to return here is -ENODEV if
 	 * no devices have actually been found.  Instead, the err
@@ -248,7 +248,7 @@ static int __init sim710_init(void)
 static void __exit sim710_exit(void)
 {
 #ifdef CONFIG_EISA
-	eisa_driver_unregister(&sim710_eisa_driver);
+	eisa_unregister_driver(&sim710_eisa_driver);
 #endif
 }
 
diff --git a/include/linux/eisa.h b/include/linux/eisa.h
index 6925249..ab8bdb8 100644
--- a/include/linux/eisa.h
+++ b/include/linux/eisa.h
@@ -65,13 +65,13 @@ struct eisa_driver {
 #ifdef CONFIG_EISA
 
 extern struct bus_type eisa_bus_type;
-int eisa_driver_register (struct eisa_driver *edrv);
-void eisa_driver_unregister (struct eisa_driver *edrv);
+int eisa_register_driver (struct eisa_driver *edrv);
+void eisa_unregister_driver (struct eisa_driver *edrv);
 
 #else /* !CONFIG_EISA */
 
-static inline int eisa_driver_register (struct eisa_driver *edrv) { return 0; }
-static inline void eisa_driver_unregister (struct eisa_driver *edrv) { }
+static inline int eisa_register_driver (struct eisa_driver *edrv) { return 0; }
+static inline void eisa_unregister_driver (struct eisa_driver *edrv) { }
 
 #endif /* !CONFIG_EISA */
 
-- 
1.7.2.5


^ permalink raw reply related

* Re: Introduce support to lazy initialize mostly static keys v2
From: David Miller @ 2013-10-06  2:55 UTC (permalink / raw)
  To: hannes; +Cc: netdev, linux-kernel
In-Reply-To: <1381015258-7667-1-git-send-email-hannes@stressinduktion.org>

Please in the future use "[PATCH 0/8] " as a subject prefix for
postings such as this one which introduces a patch set.

Thanks.

^ permalink raw reply

* [PATCH v2 0/2] 6lowpan default hardware address
From: Alan Ott @ 2013-10-06  3:15 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Alexander Aring suggested that devices desired to be linked to 6lowpan
be checked for actually being of type IEEE802154, since IEEE802154 devices
are all that are supported by 6lowpan at present.

Alan Ott (2):
  6lowpan: Only make 6lowpan links to IEEE802154 devices
  6lowpan: Sync default hardware address of lowpan links to their wpan

 net/ieee802154/6lowpan.c | 5 +++++
 1 file changed, 5 insertions(+)

-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply

* [PATCH v2 1/2] 6lowpan: Only make 6lowpan links to IEEE802154 devices
From: Alan Ott @ 2013-10-06  3:15 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <1381029319-6835-1-git-send-email-alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

Refuse to create 6lowpan links if the actual hardware interface is
of any type other than ARPHRD_IEEE802154.

Signed-off-by: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
Suggested-by: Alexander Aring <alex.aring-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
 net/ieee802154/6lowpan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index c85e71e..8f56b2b 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
 	if (!real_dev)
 		return -ENODEV;
+	if (real_dev->type != ARPHRD_IEEE802154)
+		return -EINVAL;
 
 	lowpan_dev_info(dev)->real_dev = real_dev;
 	lowpan_dev_info(dev)->fragment_tag = 0;
-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply related

* [PATCH v2 2/2] 6lowpan: Sync default hardware address of lowpan links to their wpan
From: Alan Ott @ 2013-10-06  3:15 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <1381029319-6835-1-git-send-email-alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

When a lowpan link to a wpan device is created, set the hardware address
of the lowpan link to that of the wpan device.

Signed-off-by: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
---
 net/ieee802154/6lowpan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 8f56b2b..ff41b4d 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1388,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 
 	entry->ldev = dev;
 
+	/* Set the lowpan harware address to the wpan hardware address. */
+	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
+
 	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
 	INIT_LIST_HEAD(&entry->list);
 	list_add_tail(&entry->list, &lowpan_devices);
-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply related

* [PATCH v1 0/3] Fix race conditions in mrf24j40 interrupts
From: Alan Ott @ 2013-10-06  3:52 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller,
	david-1EggE+PRa6vk1uMJSBkQmQ
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <1369188080-8904-1-git-send-email-alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

After testing with the betas of this patchset, it's been rebased and is
ready for inclusion.

David Hauweele noticed that the mrf24j40 would hang arbitrarily after some
period of heavy traffic.  Two race conditions were discovered, and the
driver was changed to use threaded interrupts, since the enable/disable of
interrupts in the driver has recently been a lighning rod whenever issues
arise related to interrupts (costing engineering time), and since threaded
interrupts are the right way to do it.

Alan Ott (3):
  mrf24j40: Move INIT_COMPLETION() to before packet transmission
  mrf24j40: Use threaded IRQ handler
  mrf24j40: Use level-triggered interrupts

 drivers/net/ieee802154/mrf24j40.c | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply

* [PATCH v1 1/3] mrf24j40: Move INIT_COMPLETION() to before packet transmission
From: Alan Ott @ 2013-10-06  3:52 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller,
	david-1EggE+PRa6vk1uMJSBkQmQ
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <1381031544-2960-1-git-send-email-alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

This avoids a race condition where complete(tx_complete) could be called
before tx_complete is initialized.

Signed-off-by: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
---
 drivers/net/ieee802154/mrf24j40.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index 42e6dee..66bb4ce 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -344,6 +344,8 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb)
 	if (ret)
 		goto err;
 
+	INIT_COMPLETION(devrec->tx_complete);
+
 	/* Set TXNTRIG bit of TXNCON to send packet */
 	ret = read_short_reg(devrec, REG_TXNCON, &val);
 	if (ret)
@@ -354,8 +356,6 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb)
 		val |= 0x4;
 	write_short_reg(devrec, REG_TXNCON, val);
 
-	INIT_COMPLETION(devrec->tx_complete);
-
 	/* Wait for the device to send the TX complete interrupt. */
 	ret = wait_for_completion_interruptible_timeout(
 						&devrec->tx_complete,
-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply related

* [PATCH v1 2/3] mrf24j40: Use threaded IRQ handler
From: Alan Ott @ 2013-10-06  3:52 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller,
	david-1EggE+PRa6vk1uMJSBkQmQ
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <1381031544-2960-1-git-send-email-alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

Eliminate all the workqueue and interrupt enable/disable.

Signed-off-by: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
---
 drivers/net/ieee802154/mrf24j40.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index 66bb4ce..c1bc688 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -82,7 +82,6 @@ struct mrf24j40 {
 
 	struct mutex buffer_mutex; /* only used to protect buf */
 	struct completion tx_complete;
-	struct work_struct irqwork;
 	u8 *buf; /* 3 bytes. Used for SPI single-register transfers. */
 };
 
@@ -590,17 +589,6 @@ static struct ieee802154_ops mrf24j40_ops = {
 static irqreturn_t mrf24j40_isr(int irq, void *data)
 {
 	struct mrf24j40 *devrec = data;
-
-	disable_irq_nosync(irq);
-
-	schedule_work(&devrec->irqwork);
-
-	return IRQ_HANDLED;
-}
-
-static void mrf24j40_isrwork(struct work_struct *work)
-{
-	struct mrf24j40 *devrec = container_of(work, struct mrf24j40, irqwork);
 	u8 intstat;
 	int ret;
 
@@ -618,7 +606,7 @@ static void mrf24j40_isrwork(struct work_struct *work)
 		mrf24j40_handle_rx(devrec);
 
 out:
-	enable_irq(devrec->spi->irq);
+	return IRQ_HANDLED;
 }
 
 static int mrf24j40_probe(struct spi_device *spi)
@@ -642,7 +630,6 @@ static int mrf24j40_probe(struct spi_device *spi)
 
 	mutex_init(&devrec->buffer_mutex);
 	init_completion(&devrec->tx_complete);
-	INIT_WORK(&devrec->irqwork, mrf24j40_isrwork);
 	devrec->spi = spi;
 	spi_set_drvdata(spi, devrec);
 
@@ -688,11 +675,12 @@ static int mrf24j40_probe(struct spi_device *spi)
 	val &= ~0x3; /* Clear RX mode (normal) */
 	write_short_reg(devrec, REG_RXMCR, val);
 
-	ret = request_irq(spi->irq,
-			  mrf24j40_isr,
-			  IRQF_TRIGGER_FALLING,
-			  dev_name(&spi->dev),
-			  devrec);
+	ret = request_threaded_irq(spi->irq,
+				   NULL,
+				   mrf24j40_isr,
+				   IRQF_TRIGGER_FALLING|IRQF_ONESHOT,
+				   dev_name(&spi->dev),
+				   devrec);
 
 	if (ret) {
 		dev_err(printdev(devrec), "Unable to get IRQ");
@@ -721,7 +709,6 @@ static int mrf24j40_remove(struct spi_device *spi)
 	dev_dbg(printdev(devrec), "remove\n");
 
 	free_irq(spi->irq, devrec);
-	flush_work(&devrec->irqwork); /* TODO: Is this the right call? */
 	ieee802154_unregister_device(devrec->dev);
 	ieee802154_free_device(devrec->dev);
 	/* TODO: Will ieee802154_free_device() wait until ->xmit() is
-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply related

* [PATCH v1 3/3] mrf24j40: Use level-triggered interrupts
From: Alan Ott @ 2013-10-06  3:52 UTC (permalink / raw)
  To: Alexander Smirnov, Dmitry Eremin-Solenikov, David S. Miller,
	david-1EggE+PRa6vk1uMJSBkQmQ
  Cc: Alan Ott, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <1381031544-2960-1-git-send-email-alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>

The mrf24j40 generates level interrupts. There are rare cases where it
appears that the interrupt line never gets de-asserted between interrupts,
causing interrupts to be lost, and causing a hung device from the driver's
perspective.  Switching the driver to interpret these interrupts as
level-triggered fixes this issue.

Signed-off-by: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
---
 drivers/net/ieee802154/mrf24j40.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index c1bc688..0632d34 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -678,7 +678,7 @@ static int mrf24j40_probe(struct spi_device *spi)
 	ret = request_threaded_irq(spi->irq,
 				   NULL,
 				   mrf24j40_isr,
-				   IRQF_TRIGGER_FALLING|IRQF_ONESHOT,
+				   IRQF_TRIGGER_LOW|IRQF_ONESHOT,
 				   dev_name(&spi->dev),
 				   devrec);
 
-- 
1.8.1.2


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134791&iu=/4140/ostg.clktrk

^ permalink raw reply related

* [PATCH] net: p54spi: remove deprecated IRQF_DISABLED
From: Michael Opdenacker @ 2013-10-06  5:04 UTC (permalink / raw)
  To: chunkeey, linville
  Cc: linux-wireless, netdev, linux-kernel, Michael Opdenacker

This patch proposes to remove the use of the IRQF_DISABLED flag

It's a NOOP since 2.6.35 and it will be removed one day.

Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
---
 drivers/net/wireless/p54/p54spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index 7fc46f2..de15171 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -636,7 +636,7 @@ static int p54spi_probe(struct spi_device *spi)
 	gpio_direction_input(p54spi_gpio_irq);
 
 	ret = request_irq(gpio_to_irq(p54spi_gpio_irq),
-			  p54spi_interrupt, IRQF_DISABLED, "p54spi",
+			  p54spi_interrupt, 0, "p54spi",
 			  priv->spi);
 	if (ret < 0) {
 		dev_err(&priv->spi->dev, "request_irq() failed");
-- 
1.8.1.2

^ permalink raw reply related

* Re: [PATCH] net: secure_seq: Move net_secret_init() definition into CONFIG_IPV6 if block
From: Olof Johansson @ 2013-10-06  5:25 UTC (permalink / raw)
  To: David Miller
  Cc: Fabio Estevam, edumazet, hannes, Network Development,
	Fabio Estevam
In-Reply-To: <20131005.162752.155685316245760253.davem@davemloft.net>

On Sat, Oct 5, 2013 at 1:27 PM, David Miller <davem@davemloft.net> wrote:
> From: Fabio Estevam <festevam@gmail.com>
> Date: Sat,  5 Oct 2013 17:09:50 -0300
>
>> From: Fabio Estevam <fabio.estevam@freescale.com>
>>
>> Commit 9a3bab6b05 (net: net_secret should not depend on TCP) introduced
>> the following build warning when CONFIG_IPV6 is not selected:
>>
>> net/core/secure_seq.c:17:13: warning: 'net_secret_init' defined but not used [-Wunused-function]
>>
>> Fix it by moving net_secret_init(void) inside the '#if IS_ENABLED(CONFIG_IPV6)'
>> block.
>>
>> Reported-by: Olof Johansson <olof@lixom.net>
>> Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
>
> seq_scale is used by secure_tcp_sequence_number, which is only protected
> by CONFIG_INET.  I have no idea how you can get this build problem.
>
> And I cannot reproduce it here:

You get it if you have CONFIG_NET enabled, but CONFIG_INET off. We
seem to have a few defconfigs on arm that has that setting, likely
because they lack native network interfaces but need local unix
sockets. Or whatever. But that's how you hit it.

Steps to reproduce, even with ARCH=sparc:

make allnoconfig
edit .config, set CONFIG_NET=y
yes "" | make oldconfig
make


-Olof

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-06  6:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Ben Hutchings, linux-kernel, Bjorn Helgaas, Ralf Baechle,
	Michael Ellerman, Martin Schwidefsky, Ingo Molnar, Tejun Heo,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-mips, linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver
In-Reply-To: <1381009586.645.141.camel@pasglop>

On Sun, Oct 06, 2013 at 08:46:26AM +1100, Benjamin Herrenschmidt wrote:
> On Sat, 2013-10-05 at 16:20 +0200, Alexander Gordeev wrote:
> > So my point is - drivers should first obtain a number of MSIs they *can*
> > get, then *derive* a number of MSIs the device is fine with and only then
> > request that number. Not terribly different from memory or any other type
> > of resource allocation ;)
> 
> What if the limit is for a group of devices ? Your interface is racy in
> that case, another driver could have eaten into the limit in between the
> calls.

Well, the another driver has had a better karma ;) But seriously, the
current scheme with a loop is not race-safe wrt to any other type of
resource which might exhaust. What makes the quota so special so we
should care about it and should not care i.e. about lack of msi_desc's?

Yeah, I know the quota might hit more likely. But why it is not addressed
right now then? Not a single function in chains...
  rtas_msi_check_device() -> msi_quota_for_device() -> traverse_pci_devices()
  rtas_setup_msi_irqs() -> msi_quota_for_device() -> traverse_pci_devices()
...is race-safe. So if it has not been bothering anyone until now then
no reason to start worrying now :)

In fact, in the current design to address the quota race decently the
drivers would have to protect the *loop* to prevent the quota change
between a pci_enable_msix() returned a positive number and the the next
call to pci_enable_msix() with that number. Is it doable?

> Ben.
> 
> 

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Benjamin Herrenschmidt @ 2013-10-06  6:19 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: Ben Hutchings, linux-kernel, Bjorn Helgaas, Ralf Baechle,
	Michael Ellerman, Martin Schwidefsky, Ingo Molnar, Tejun Heo,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-mips, linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver
In-Reply-To: <20131006060243.GB28142@dhcp-26-207.brq.redhat.com>

On Sun, 2013-10-06 at 08:02 +0200, Alexander Gordeev wrote:
> On Sun, Oct 06, 2013 at 08:46:26AM +1100, Benjamin Herrenschmidt wrote:
> > On Sat, 2013-10-05 at 16:20 +0200, Alexander Gordeev wrote:
> > > So my point is - drivers should first obtain a number of MSIs they *can*
> > > get, then *derive* a number of MSIs the device is fine with and only then
> > > request that number. Not terribly different from memory or any other type
> > > of resource allocation ;)
> > 
> > What if the limit is for a group of devices ? Your interface is racy in
> > that case, another driver could have eaten into the limit in between the
> > calls.
> 
> Well, the another driver has had a better karma ;) But seriously, the
> current scheme with a loop is not race-safe wrt to any other type of
> resource which might exhaust. What makes the quota so special so we
> should care about it and should not care i.e. about lack of msi_desc's?

I'm not saying the current scheme is better but I prefer the option of
passing a min,max to the request function.

> Yeah, I know the quota might hit more likely. But why it is not addressed
> right now then? Not a single function in chains...
>   rtas_msi_check_device() -> msi_quota_for_device() -> traverse_pci_devices()
>   rtas_setup_msi_irqs() -> msi_quota_for_device() -> traverse_pci_devices()
> ...is race-safe. So if it has not been bothering anyone until now then 
> no reason to start worrying now :)
>
> In fact, in the current design to address the quota race decently the
> drivers would have to protect the *loop* to prevent the quota change
> between a pci_enable_msix() returned a positive number and the the next
> call to pci_enable_msix() with that number. Is it doable?

I am not advocating for the current design, simply saying that your
proposal doesn't address this issue while Ben's does.

Cheers,
Ben.

> > Ben.
> > 
> > 
> 

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-06  7:10 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Ben Hutchings, linux-kernel, Bjorn Helgaas, Ralf Baechle,
	Michael Ellerman, Martin Schwidefsky, Ingo Molnar, Tejun Heo,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-mips, linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver
In-Reply-To: <1381040386.645.143.camel@pasglop>

On Sun, Oct 06, 2013 at 05:19:46PM +1100, Benjamin Herrenschmidt wrote:
> On Sun, 2013-10-06 at 08:02 +0200, Alexander Gordeev wrote:
> > In fact, in the current design to address the quota race decently the
> > drivers would have to protect the *loop* to prevent the quota change
> > between a pci_enable_msix() returned a positive number and the the next
> > call to pci_enable_msix() with that number. Is it doable?
> 
> I am not advocating for the current design, simply saying that your
> proposal doesn't address this issue while Ben's does.

There is one major flaw in min-max approach - the generic MSI layer
will have to take decisions on exact number of MSIs to request, not
device drivers.

This will never work for all devices, because there might be specific
requirements which are not covered by the min-max. That is what Ben
described "...say, any even number within a certain range". Ben suggests
to leave the existing loop scheme to cover such devices, which I think is
not right.

What about introducing pci_lock_msi() and pci_unlock_msi() and let device
drivers care about their ranges and specifics in race-safe manner?
I do not call to introduce it right now (since it appears pSeries has not
been hitting the race for years) just as a possible alternative to Ben's
proposal.

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* Re: Bug - regression - Via velocity interface coming up freezes kernel
From: Jamie Heilman @ 2013-10-06  8:19 UTC (permalink / raw)
  To: netdev; +Cc: Francois Romieu
In-Reply-To: <CAFES+i+9Wk6GMo8jMPH4uAz0AFi16RCWaRjG2YOyjRiBnx7OVQ@mail.gmail.com>

FWIW, I encountered this same issue on my EPIA-SN with its onboard
VT6130, I bisected back to the same commit, and Francois's patch
against 3.11.4 also eliminated the warning for me.  Hopefully a fix
is queued up for stable soon.


For the record the full trace was:

WARNING: CPU: 0 PID: 0 at kernel/softirq.c:159 _local_bh_enable_ip.isra.14+0x32/0x6e()
Modules linked in: quota_v2 quota_tree nfsd auth_rpcgss oid_registry exportfs nfs lockd fscache sunrpc xt_mark cls_fw sch_htb xt_nat iptable_nat nf_nat_ipv4 nf_nat ipt_REJECT xt_tcpudp xt_multiport iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_LOG xt_limit iptable_filter ip_tables x_tables dm_crypt dm_mod snd_hda_codec_via snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_page_alloc snd_timer snd soundcore psmouse via_rhine mii via_velocity evdev via_agp ioapic agpgart
CPU: 0 PID: 0 Comm: swapper Not tainted 3.11.4 #1
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014  06/01/2009
 00000000 00000000 c1355d04 c125c0f9 c1355d34 c1024360 c12ecca6 00000000
 00000000 c12ece5c 0000009f c1026769 c1026769 f5041cc0 c138cb2c f5ae3000
 c1355d44 c10243f3 00000009 00000000 c1355d4c c1026769 c1355d54 c10267ad
Call Trace:
 [<c125c0f9>] dump_stack+0x16/0x18
 [<c1024360>] warn_slowpath_common+0x70/0x87
 [<c1026769>] ? _local_bh_enable_ip.isra.14+0x32/0x6e
 [<c1026769>] ? _local_bh_enable_ip.isra.14+0x32/0x6e
 [<c10243f3>] warn_slowpath_null+0x1d/0x1f
 [<c1026769>] _local_bh_enable_ip.isra.14+0x32/0x6e
 [<c10267ad>] local_bh_enable+0x8/0xa
 [<c120e15f>] neigh_lookup+0x95/0x9e
 [<c124239a>] __neigh_lookup.constprop.16+0x1b/0x4e
 [<c1242b3f>] arp_process+0x3ac/0x461
 [<c1216860>] ? sch_direct_xmit+0x37/0xd9
 [<c10267ad>] ? local_bh_enable+0x8/0xa
 [<c1208ccd>] ? dev_queue_xmit+0x2aa/0x2b7
 [<c1242cc2>] arp_rcv+0xc4/0x107
 [<c1242714>] ? arp_xmit+0x18/0x4f
 [<c1206c67>] __netif_receive_skb_core+0x366/0x39f
 [<c1004f09>] ? text_poke_smp_batch+0x2a/0x2a
 [<c1206cb1>] __netif_receive_skb+0x11/0x4d
 [<c1004f09>] ? text_poke_smp_batch+0x2a/0x2a
 [<c1207747>] netif_receive_skb+0x30/0x33
 [<f84465aa>] velocity_rx_srv+0x229/0x304 [via_velocity]
 [<f84466b6>] velocity_poll+0x31/0x7a [via_velocity]
 [<c12078bb>] net_rx_action+0x69/0xe9
 [<c1026a0b>] __do_softirq+0x87/0x12d
 [<c1026b57>] irq_exit+0x36/0x6d
 [<c1002c4e>] do_IRQ+0x74/0x87
 [<c125ef73>] common_interrupt+0x33/0x38
 [<c1006739>] ? default_idle+0x5/0x7
 [<c1006a9d>] arch_cpu_idle+0x12/0x17
 [<c1040a04>] cpu_startup_entry+0xbe/0x109
 [<c1257758>] rest_init+0x57/0x59
 [<c1394920>] start_kernel+0x2be/0x2c4
 [<c1394494>] ? repair_env_string+0x51/0x51
 [<c13942c3>] i386_start_kernel+0x79/0x7d
---[ end trace 96eca78d04cc6ac3 ]---

-- 
Jamie Heilman                     http://audible.transient.net/~jamie/

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox