Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v2 1/2] net: make net_get_random_once irq safe
From: Hannes Frederic Sowa @ 2013-10-23 18:05 UTC (permalink / raw)
  To: netdev, davem, edumazet
In-Reply-To: <20131023111200.GB26236@order.stressinduktion.org>

I initial build non irq safe version of net_get_random_once because I
would liked to have the freedom to defer even the extraction process of
get_random_bytes until the nonblocking pool is fully seeded.

I don't think this is a good idea anymore and thus this patch makes
net_get_random_once irq safe. Now someone using net_get_random_once does
not need to care from where it is called.

Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
v2: Reword the commit message only. It looked horribly.

 include/linux/net.h | 1 -
 net/core/utils.c    | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index aca446b..b292a04 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -250,7 +250,6 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 #define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
 #endif /* HAVE_JUMP_LABEL */
 
-/* BE CAREFUL: this function is not interrupt safe */
 #define net_get_random_once(buf, nbytes)				\
 	({								\
 		bool ___ret = false;					\
diff --git a/net/core/utils.c b/net/core/utils.c
index bf09371..2f737bf 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -370,16 +370,17 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 			   struct static_key *done_key)
 {
 	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
 
-	spin_lock_bh(&lock);
+	spin_lock_irqsave(&lock, flags);
 	if (*done) {
-		spin_unlock_bh(&lock);
+		spin_unlock_irqrestore(&lock, flags);
 		return false;
 	}
 
 	get_random_bytes(buf, nbytes);
 	*done = true;
-	spin_unlock_bh(&lock);
+	spin_unlock_irqrestore(&lock, flags);
 
 	__net_random_once_disable_jump(done_key);
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 2/2] net: initialize hashrnd in flow_dissector with net_get_random_once
From: Hannes Frederic Sowa @ 2013-10-23 18:06 UTC (permalink / raw)
  To: netdev, davem, edumazet
In-Reply-To: <20131023111219.GA31531@order.stressinduktion.org>

We also can defer the initialization of hashrnd in flow_dissector
to its first use. Since net_get_random_once is irq safe now we don't
have to audit the call paths if one of this functions get called by an
interrupt handler.

Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
v2: Reword the commit message only. It looked horribly.

 net/core/flow_dissector.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index f8e25ac..5cac36e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -184,6 +184,22 @@ ipv6:
 EXPORT_SYMBOL(skb_flow_dissect);
 
 static u32 hashrnd __read_mostly;
+static __always_inline void __flow_hash_secret_init(void)
+{
+	net_get_random_once(&hashrnd, sizeof(hashrnd));
+}
+
+static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
+{
+	__flow_hash_secret_init();
+	return jhash_3words(a, b, c, hashrnd);
+}
+
+static __always_inline u32 __flow_hash_1word(u32 a)
+{
+	__flow_hash_secret_init();
+	return jhash_1word(a, hashrnd);
+}
 
 /*
  * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
@@ -210,9 +226,9 @@ void __skb_get_rxhash(struct sk_buff *skb)
 		swap(keys.port16[0], keys.port16[1]);
 	}
 
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src,
-			    (__force u32)keys.ports, hashrnd);
+	hash = __flow_hash_3words((__force u32)keys.dst,
+				  (__force u32)keys.src,
+				  (__force u32)keys.ports);
 	if (!hash)
 		hash = 1;
 
@@ -248,7 +264,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		hash = skb->sk->sk_hash;
 	else
 		hash = (__force u16) skb->protocol;
-	hash = jhash_1word(hash, hashrnd);
+	hash = __flow_hash_1word(hash);
 
 	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
 }
@@ -340,7 +356,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 				else
 					hash = (__force u16) skb->protocol ^
 					    skb->rxhash;
-				hash = jhash_1word(hash, hashrnd);
+				hash = __flow_hash_1word(hash);
 				queue_index = map->queues[
 				    ((u64)hash * map->len) >> 32];
 			}
@@ -395,11 +411,3 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 	skb_set_queue_mapping(skb, queue_index);
 	return netdev_get_tx_queue(dev, queue_index);
 }
-
-static int __init initialize_hashrnd(void)
-{
-	get_random_bytes(&hashrnd, sizeof(hashrnd));
-	return 0;
-}
-
-late_initcall_sync(initialize_hashrnd);
-- 
1.8.3.1

^ permalink raw reply related

* Re: Neterion and UFO handling [was: Re: [PATCH] ipv6: udp packets following an UFO enqueued packet need also be handled by UFO]
From: Hannes Frederic Sowa @ 2013-10-23 18:15 UTC (permalink / raw)
  To: Jon Mason
  Cc: Jiri Pirko, netdev, yoshfuji, David Miller, Alexey Kuznetsov,
	jmorris, Patrick McHardy, Herbert Xu, Eric Dumazet
In-Reply-To: <CAPoiz9xnvmQ18D0iGdFVKL8bD-1nC=8zQVD9o1oxfT4Qc942xw@mail.gmail.com>

On Wed, Oct 23, 2013 at 09:35:43AM -0700, Jon Mason wrote:
> On Wed, Oct 16, 2013 at 9:45 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > Hi Jon and Jiri!
> >
> > Just wanted to remind you if you could have a look at this?
> >
> > If you don't have time to test this may I know your assessment of the
> > situation? I could send a compile-time tested patch to disable UFO or if you
> > say so we could leave this as is.
> 
> So, bad news.  My Xframe 2 adapter (the only variety that does UFO
> offload) won't fit in a standard PCI(32) slot.  Since my PCI-X system
> at home is faulty (I'm trying to fix it , but it won't be in the time
> frame you want), there is no way for me to test it on the hardware.
> Terribly sorry.
> 
> I am fine with this patch going out, since UFO is off by default.
> I'll handle any issues once they are discovered.  Alternatively, we
> could just kill UFO and make everyone's lives easier.

Oh, I missed that UFO is off by default.

If it turns out that UFO is causing broken frames it should be either
killed or (if you have the time for that) fixed. Because there shouldn't
be regressions in stable kernels I am fine with this resolution. Maybe
you can have a look at this problem once your hardware is fixed.

Thank you,

  Hannes

^ permalink raw reply

* Re: [PATCH 1/4] [RFC] net: Explicitly initialize u64_stats_sync structures for lockdep
From: John Stultz @ 2013-10-23 18:23 UTC (permalink / raw)
  To: John Stultz, LKML
  Cc: Eric Dumazet, Thomas Petazzoni, Mirko Lindner, Stephen Hemminger,
	Roger Luethi, Patrick McHardy, Rusty Russell, Michael S. Tsirkin,
	Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI, Wensong Zhang,
	Simon Horman, Julian Anastasov, Jesse Gross, Mathieu Desnoyers,
	Steven Rostedt, Peter Zijlstra, Ingo Molnar, Thomas Gleixner,
	David S. Miller, netdev, netfilter-devel
In-Reply-To: <1381186321-4906-2-git-send-email-john.stultz@linaro.org>

On 10/07/2013 03:51 PM, John Stultz wrote:
> In order to enable lockdep on seqcount/seqlock structures, we
> must explicitly initialize any locks.
>
> The u64_stats_sync structure, uses a seqcount, and thus we need
> to introduce a u64_stats_init() function and use it to initialize
> the structure.
>
> This unfortunately adds a lot of fairly trivial initialization code
> to a number of drivers. But the benefit of ensuring correctness makes
> this worth while.
>
> Because these changes are required for lockdep to be enabled, and the
> changes are quite trivial, I've not yet split this patch out into 30-some
> separate patches, as I figured it would be better to get the various
> maintainers thoughts on how to best merge this change along with
> the seqcount lockdep enablement.

Just wanted to ping folks on this patch, as I haven't gotten any feedback.

As its a prereq for the seqcount lockdep support, I'd like to get it
queued/merged, but I'm not sure what the right maintainer path or
approach should be.

1) Do folks prefer to see this patch split up into 30-some separate
trivial clenaup patches, or have it go in all as one logical change?

2) Would folks want this patch (in whichever form) to be merged
separately via the networking maintainers, or can it be merged via -tip
as part of the seqcount lockdep series?

thanks
-john

^ permalink raw reply

* [PATCH net-next] fix rtnl notification in atomic context
From: Alexei Starovoitov @ 2013-10-23 18:32 UTC (permalink / raw)
  To: David S. Miller; +Cc: Nicolas Dichtel, Cong Wang, netdev

commit 991fb3f74c "dev: always advertise rx_flags changes via netlink"
introduced rtnl notification from __dev_set_promiscuity(),
which can be called in atomic context.

Steps to reproduce:
ip tuntap add dev tap1 mode tap
ifconfig tap1 up
tcpdump -nei tap1 &
ip tuntap del dev tap1 mode tap

[  271.627994] device tap1 left promiscuous mode
[  271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940
[  271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip
[  271.677525] INFO: lockdep is turned off.
[  271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G        W    3.12.0-rc3+ #73
[  271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[  271.731254]  ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428
[  271.760261]  ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8
[  271.790683]  0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8
[  271.822332] Call Trace:
[  271.838234]  [<ffffffff817544e5>] dump_stack+0x55/0x76
[  271.854446]  [<ffffffff8108bad1>] __might_sleep+0x181/0x240
[  271.870836]  [<ffffffff81110ff8>] ? rcu_irq_exit+0x68/0xb0
[  271.887076]  [<ffffffff811a80be>] kmem_cache_alloc_node+0x4e/0x2a0
[  271.903368]  [<ffffffff810b4ddc>] ? vprintk_emit+0x1dc/0x5a0
[  271.919716]  [<ffffffff81614d67>] ? __alloc_skb+0x57/0x2a0
[  271.936088]  [<ffffffff810b4de0>] ? vprintk_emit+0x1e0/0x5a0
[  271.952504]  [<ffffffff81614d67>] __alloc_skb+0x57/0x2a0
[  271.968902]  [<ffffffff8163a0b2>] rtmsg_ifinfo+0x52/0x100
[  271.985302]  [<ffffffff8162ac6d>] __dev_notify_flags+0xad/0xc0
[  272.001642]  [<ffffffff8162ad0c>] __dev_set_promiscuity+0x8c/0x1c0
[  272.017917]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.033961]  [<ffffffff8162b109>] dev_set_promiscuity+0x29/0x50
[  272.049855]  [<ffffffff8172e937>] packet_dev_mc+0x87/0xc0
[  272.065494]  [<ffffffff81732052>] packet_notifier+0x1b2/0x380
[  272.080915]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.096009]  [<ffffffff81761c66>] notifier_call_chain+0x66/0x150
[  272.110803]  [<ffffffff8108503e>] __raw_notifier_call_chain+0xe/0x10
[  272.125468]  [<ffffffff81085056>] raw_notifier_call_chain+0x16/0x20
[  272.139984]  [<ffffffff81620190>] call_netdevice_notifiers_info+0x40/0x70
[  272.154523]  [<ffffffff816201d6>] call_netdevice_notifiers+0x16/0x20
[  272.168552]  [<ffffffff816224c5>] rollback_registered_many+0x145/0x240
[  272.182263]  [<ffffffff81622641>] rollback_registered+0x31/0x40
[  272.195369]  [<ffffffff816229c8>] unregister_netdevice_queue+0x58/0x90
[  272.208230]  [<ffffffff81547ca0>] __tun_detach+0x140/0x340
[  272.220686]  [<ffffffff81547ed6>] tun_chr_close+0x36/0x60

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 include/linux/rtnetlink.h |    4 +++-
 net/core/dev.c            |    2 +-
 net/core/rtnetlink.c      |   12 +++++++++---
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f28544b..26f5edc 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -15,7 +15,9 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
-extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
+void __rtmsg_ifinfo(int type, struct net_device *dev, unsigned change,
+		    gfp_t flags);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
 
 /* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0918aad..59b90fe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5257,7 +5257,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 	unsigned int changes = dev->flags ^ old_flags;
 
 	if (gchanges)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+		__rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4aedf03..5931af9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+void __rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		    gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
 	if (skb == NULL)
 		goto errout;
 
@@ -2002,12 +2003,17 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 	return;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+{
+	__rtmsg_ifinfo(type, dev, change, GFP_KERNEL);
+}
 EXPORT_SYMBOL(rtmsg_ifinfo);
 
 static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
-- 
1.7.9.5

^ permalink raw reply related

* URGENT REPLY NEEDED
From: Suleman Abubaker @ 2013-10-23 18:57 UTC (permalink / raw)



Greetings Dear Friend.

I am Mr Suleman Abubaker, Staff of Bank Of Africa in Burkina Faso. I would like you to indicate your interest to 
receive the transfer of ($20.5 Million Dollars) I will like you to stand as the next of kin to my late client whose account is presently dormant for claims. if you are interested,indicate and i will intimate you with the method of application and how you can apply to the bank .

But before i send to you the text of application form,I will like you to send me the following informations

1.NAME IN FULL:................................
2.ADDRESS:.......................................
3.NATIONALITY:.................................. .
4.AGE:.............................................
5.SEX..............................................
6.OCCUPATION:......................................
7.MARITAL STATUS:..................................
8.PRIAVTE PHONE NO............................................
9.PRIVATE FAX NO:.............................................
10.ATTACH COPY OF YOUR IDENTIFICATION...........................

Now my questions are:
1) Can you handle this project?
2) Can I give you this trust?

so i will like you to send to me those informations for easy and effective communication.Upon receipt of your reply, I will send to you by fax or 
email the text of the application form.I will not fail to bring to your notice that this transaction is hitch-free and that you should not entertain any atom of fear as all required arrangements have been made for the transfer. You should contact me immediately as soon as you receive this letter,if only you are intrested and ready to help. Trusting to hear from you immediately.
Do keep this a top secret for security reasons.

Best Regards
Mr Suleman Abubaker.

^ permalink raw reply

* Re: [PATCH 1/4] [RFC] net: Explicitly initialize u64_stats_sync structures for lockdep
From: Julian Anastasov @ 2013-10-23 19:37 UTC (permalink / raw)
  To: John Stultz
  Cc: LKML, Eric Dumazet, Thomas Petazzoni, Mirko Lindner,
	Stephen Hemminger, Roger Luethi, Patrick McHardy, Rusty Russell,
	Michael S. Tsirkin, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Wensong Zhang, Simon Horman, Jesse Gross,
	Mathieu Desnoyers, Steven Rostedt, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, David S. Miller, netdev, netfilter-devel
In-Reply-To: <52681407.8060804@linaro.org>


	Hello,

On Wed, 23 Oct 2013, John Stultz wrote:

> Just wanted to ping folks on this patch, as I haven't gotten any feedback.
> 
> As its a prereq for the seqcount lockdep support, I'd like to get it
> queued/merged, but I'm not sure what the right maintainer path or
> approach should be.
> 
> 1) Do folks prefer to see this patch split up into 30-some separate
> trivial clenaup patches, or have it go in all as one logical change?
> 
> 2) Would folks want this patch (in whichever form) to be merged
> separately via the networking maintainers, or can it be merged via -tip
> as part of the seqcount lockdep series?

	The IPVS part in net/netfilter/ipvs/ looks ok to me.
We do not have any pending changes for this area, so you can
skip the ipvs trees for this change. If you need ack for the
IPVS part, here it is:

Acked-by: Julian Anastasov <ja@ssi.bg>

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* Re: [PATCH net-next 0/2] Removal of struct esp_data
From: David Miller @ 2013-10-23 19:40 UTC (permalink / raw)
  To: steffen.klassert; +Cc: mathias.krause, netdev, herbert
In-Reply-To: <20131023080716.GA10148@secunet.com>

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Oct 2013 10:07:16 +0200

> Well, I thought to either take this as a reminder to implement the
> missing stuff or to take the removing patches if this is really obsolete.
> I'll do one of both once I'm back from conference week in Edinburgh.

Sounds like a good plan, thanks.

^ permalink raw reply

* [PATCH] Fix: Dereference pointer-value of sk_prot->memory_pressure
From: Eric W. Biederman @ 2013-10-23 19:55 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Paasch, fengguang.wu, netdev, linux-kernel,
	Eric Dumazet
In-Reply-To: <1382533364.7572.15.camel@edumazet-glaptop.roam.corp.google.com>

From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Wed, 23 Oct 2013 12:49:21 -0700

2e685cad57 (tcp_memcontrol: Kill struct tcp_memcontrol) falsly modified
the access to memory_pressure of sk->sk_prot->memory_pressure. The patch
did modify the memory_pressure-field of struct cg_proto, but not the one
of struct proto.

So, the access to sk_prot->memory_pressure should not be changed.

Acked-by: Eric Dumazet <edumazet@google.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/net/sock.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index c93542f92420..e3a18ff0c38b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1137,7 +1137,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
 		return !!sk->sk_cgrp->memory_pressure;
 
-	return !!sk->sk_prot->memory_pressure;
+	return !!*sk->sk_prot->memory_pressure;
 }
 
 static inline void sk_leave_memory_pressure(struct sock *sk)
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH] Fix: Dereference pointer-value of sk_prot->memory_pressure
From: Eric W. Biederman @ 2013-10-23 19:58 UTC (permalink / raw)
  To: David Miller
  Cc: Christoph Paasch, fengguang.wu, netdev, linux-kernel,
	Eric Dumazet
In-Reply-To: <1382533364.7572.15.camel@edumazet-glaptop.roam.corp.google.com>

From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Wed, 23 Oct 2013 12:49:21 -0700

2e685cad57 (tcp_memcontrol: Kill struct tcp_memcontrol) falsly modified
the access to memory_pressure of sk->sk_prot->memory_pressure. The patch
did modify the memory_pressure-field of struct cg_proto, but not the one
of struct proto.

So, the access to sk_prot->memory_pressure should not be changed.

Acked-by: Eric Dumazet <edumazet@google.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---

Resent because I fat fingered and deleted Dave by accident.

 include/net/sock.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index c93542f92420..e3a18ff0c38b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1137,7 +1137,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
 		return !!sk->sk_cgrp->memory_pressure;
 
-	return !!sk->sk_prot->memory_pressure;
+	return !!*sk->sk_prot->memory_pressure;
 }
 
 static inline void sk_leave_memory_pressure(struct sock *sk)
-- 
1.7.5.4

^ permalink raw reply related

* Re: [PATCH] Fix: Dereference pointer-value of sk_prot->memory_pressure
From: David Miller @ 2013-10-23 20:15 UTC (permalink / raw)
  To: ebiederm; +Cc: eric.dumazet, christoph.paasch, fengguang.wu, netdev,
	linux-kernel
In-Reply-To: <87r4bbiwyh.fsf_-_@xmission.com>

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Wed, 23 Oct 2013 12:55:18 -0700

> From: Christoph Paasch <christoph.paasch@uclouvain.be>
> Date: Wed, 23 Oct 2013 12:49:21 -0700
> 
> 2e685cad57 (tcp_memcontrol: Kill struct tcp_memcontrol) falsly modified
> the access to memory_pressure of sk->sk_prot->memory_pressure. The patch
> did modify the memory_pressure-field of struct cg_proto, but not the one
> of struct proto.
> 
> So, the access to sk_prot->memory_pressure should not be changed.
> 
> Acked-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Fengguang Wu <fengguang.wu@intel.com>
> Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Applied, but I replaced "Fix: " with "net: " in the commit header line.

^ permalink raw reply

* Re: [PATCH net] netpoll: fix rx_hook() interface by passing the skb
From: David Miller @ 2013-10-23 20:16 UTC (permalink / raw)
  To: antonio; +Cc: David.Laight, netdev
In-Reply-To: <20131023124401.GC1535@neomailbox.net>

From: Antonio Quartulli <antonio@meshcoding.com>
Date: Wed, 23 Oct 2013 14:44:01 +0200

> On Wed, Oct 23, 2013 at 12:18:32PM +0100, David Laight wrote:
>> > My idea is to use the following API:
>> > 
>> > rx_skb_hook(struct netpoll *np, int source, struct sk_buff *skb, int len);
>> > 
>> > Any suggestion or objection?
>> 
>> Don't you need to pass the offset of the udp data?
> 
> Yes, you are right. I just forgot it. Therefore we have:
> 
> rx_skb_hook(struct netpoll *np, int source, struct sk_buff *skb, int offset,
> 	    int len);
> 
> where offset is going to be = (udp_hdr + 1) - skb->data
> and len = skb->len - offset

This looks good to me.

^ permalink raw reply

* Re: [PATCH net-next] net: always inline net_secret_init
From: David Miller @ 2013-10-23 20:27 UTC (permalink / raw)
  To: hannes; +Cc: netdev
In-Reply-To: <20131023064450.GA26236@order.stressinduktion.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 23 Oct 2013 08:44:50 +0200

> Currently net_secret_init does not get inlined, so we always have a call
> to net_secret_init even in the fast path.
> 
> Let's specify net_secret_init as __always_inline so we have the nop in
> the fast-path without the call to net_secret_init and the unlikely path
> at the epilogue of the function.
> 
> jump_labels handle the inlining correctly.
> 
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

Applied, thanks Hannes.

^ permalink raw reply

* Re: [PATCH] sh_eth: add/use RMCR.RNC bit
From: David Miller @ 2013-10-23 20:50 UTC (permalink / raw)
  To: sergei.shtylyov; +Cc: netdev, nobuhiro.iwamatsu.yj, linux-sh, horms
In-Reply-To: <5266F94E.9030406@cogentembedded.com>

From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 23 Oct 2013 02:16:46 +0400

> Hello.
> 
> On 10/16/2013 02:29 AM, Sergei Shtylyov wrote:
> 
>> Declare 'enum EMCR_BIT' containing the single member for the RMCR.RNC
>> bit and
> 
>    Hm, looks like I typoed here, should have been RMCR_BIT. David, should
>    I resubmit or you can fix it while applying? Or simply not worth the
>    trouble?

Applied, with the typo fixed, thanks.

^ permalink raw reply

* Re: [PATCH 0/3] netfilter fixes for net
From: David Miller @ 2013-10-23 20:56 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1382519724-3953-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 23 Oct 2013 11:15:21 +0200

> The following patchset contains three netfilter fixes for your net
> tree, they are:
> 
> * A couple of fixes to resolve info leak to userspace due to uninitialized
>   memory area in ulogd, from Mathias Krause.
> 
> * Fix instruction ordering issues that may lead to the access of
>   uninitialized data in x_tables. The problem involves the table update
>  (producer) and the main packet matching (consumer) routines. Detected in
>   SMP ARMv7, from Will Deacon.
> 
> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git master

Pulled, thanks Pablo.

^ permalink raw reply

* Re: [PATCH net] net: sctp: fix ASCONF to allow non SCTP_ADDR_SRC addresses in ipv6
From: David Miller @ 2013-10-23 20:57 UTC (permalink / raw)
  To: dborkman; +Cc: netdev, linux-sctp, micchie
In-Reply-To: <1382459696-1732-1-git-send-email-dborkman@redhat.com>

From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 22 Oct 2013 18:34:56 +0200

> Commit 8a07eb0a50 ("sctp: Add ASCONF operation on the single-homed host")
> implemented possible use of IPv4 addresses with non SCTP_ADDR_SRC state
> as source address when sending ASCONF (ADD) packets, but IPv6 part for
> that was not implemented in 8a07eb0a50. Therefore, as this is not restricted
> to IPv4-only, fix this up to allow the same for IPv6 addresses in SCTP.
> 
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
> Cc: Michio Honda <micchie@sfc.wide.ad.jp>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next 0/3] initialize fragment hash secrets with net_get_random_once
From: David Miller @ 2013-10-23 21:02 UTC (permalink / raw)
  To: hannes; +Cc: netdev, netfilter-devel
In-Reply-To: <1382519217-750-1-git-send-email-hannes@stressinduktion.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 23 Oct 2013 11:06:54 +0200

> This series switches the inet_frag.rnd hash initialization to
> net_get_random_once.
> 
> Included patches:
>  ipv4: initialize ip4_frags hash secret as late
>  ipv6: split inet6_hash_frag for netfilter and
>  inet: remove old fragmentation hash initializing

Looks good, series applied, thanks Hannes.

^ permalink raw reply

* Re: [PATCH net-next] fix rtnl notification in atomic context
From: Stephen Hemminger @ 2013-10-23 21:03 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Nicolas Dichtel, Cong Wang, netdev
In-Reply-To: <1382553161-3498-1-git-send-email-ast@plumgrid.com>

On Wed, 23 Oct 2013 11:32:41 -0700
Alexei Starovoitov <ast@plumgrid.com> wrote:

> +
> +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
> +{
> +	__rtmsg_ifinfo(type, dev, change, GFP_KERNEL);
> +}
>  EXPORT_SYMBOL(rtmsg_ifinfo);
>  
>  static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
> -- 

Why add another wrapper function? I think it cleaner to just change all the
callers to use the correct gfp flags.

^ permalink raw reply

* Re: [PATCH net-next] fix rtnl notification in atomic context
From: David Miller @ 2013-10-23 21:09 UTC (permalink / raw)
  To: stephen; +Cc: ast, nicolas.dichtel, amwang, netdev
In-Reply-To: <20131023140343.4604d80d@nehalam.linuxnetplumber.net>

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed, 23 Oct 2013 14:03:43 -0700

> On Wed, 23 Oct 2013 11:32:41 -0700
> Alexei Starovoitov <ast@plumgrid.com> wrote:
> 
>> +
>> +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
>> +{
>> +	__rtmsg_ifinfo(type, dev, change, GFP_KERNEL);
>> +}
>>  EXPORT_SYMBOL(rtmsg_ifinfo);
>>  
>>  static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
>> -- 
> 
> Why add another wrapper function? I think it cleaner to just change all the
> callers to use the correct gfp flags.

Indeed, if this were targetted to "net" we'd have the argument of trying
to simplify the patch for -stable inclusion.

But since this is going into net-next, let's just put explicit GFP_* args
at the call site.

^ permalink raw reply

* Re: pull request: batman-adv 2013-10-23
From: David Miller @ 2013-10-23 21:13 UTC (permalink / raw)
  To: antonio; +Cc: netdev, b.a.t.m.a.n
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@meshcoding.com>
Date: Wed, 23 Oct 2013 18:04:47 +0200

> this is another set of changes intended for net-next/linux-3.13.
> (probably our last pull request for this cycle)
> 
> Patches 1 and 2 reshape two of our main data structures in a way that they can
> easily be extended in the future to accommodate new routing protocols.
> 
> Patches from 3 to 9 improve our routing protocol API and its users so that all
> the protocol-related code is not mixed up with the other components anymore.
> 
> Patch 10 limits the local Translation Table maximum size to a value such that it
> can be fully transfered over the air if needed. This value depends on
> fragmentation being enabled or not and on the mtu values.
> 
> Patch 11 makes batman-adv send a uevent in case of soft-interface destruction
> while a "bat-Gateway" was configured (this informs userspace about the GW not
> being available anymore).
> 
> Patches 13 and 14 enable the TT component to detect non-mesh client flag
> changes at runtime (till now those flags where set upon client detection and
> were not changed anymore).
> 
> Patch 16 is a generalisation of our user-to-kernel space communication (and
> viceversa) used to exchange ICMP packets to send/received to/from the mesh
> network. Now it can easily accommodate new ICMP packet types without breaking
> the existing userspace API anymore.
> 
> Remaining patches are minor changes and cleanups.

Pulled, thanks Antonio.

^ permalink raw reply

* Re: [PATCH net-next] fix rtnl notification in atomic context
From: Alexei Starovoitov @ 2013-10-23 21:25 UTC (permalink / raw)
  To: David Miller; +Cc: stephen, nicolas.dichtel, amwang, netdev
In-Reply-To: <20131023.170919.2254167416151180538.davem@davemloft.net>

On Wed, Oct 23, 2013 at 2:09 PM, David Miller <davem@davemloft.net> wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Wed, 23 Oct 2013 14:03:43 -0700
>
>> On Wed, 23 Oct 2013 11:32:41 -0700
>> Alexei Starovoitov <ast@plumgrid.com> wrote:
>>
>>> +
>>> +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
>>> +{
>>> +    __rtmsg_ifinfo(type, dev, change, GFP_KERNEL);
>>> +}
>>>  EXPORT_SYMBOL(rtmsg_ifinfo);
>>>
>>>  static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
>>> --
>>
>> Why add another wrapper function? I think it cleaner to just change all the
>> callers to use the correct gfp flags.
>
> Indeed, if this were targetted to "net" we'd have the argument of trying
> to simplify the patch for -stable inclusion.
>
> But since this is going into net-next, let's just put explicit GFP_* args
> at the call site.

sure. Will respin.

^ permalink raw reply

* [PATCHv2 net] netpoll: fix rx_hook() interface by passing the skb
From: Antonio Quartulli @ 2013-10-23 21:36 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, David.Laight, Antonio Quartulli
In-Reply-To: <20131023.161603.1190144528425577653.davem@davemloft.net>

Right now skb->data is passed to rx_hook() even if the skb
has not been linearised and without giving rx_hook() a way
to linearise it.

Change the rx_hook() interface and make it accept the skb
and the offset to the UDP payload as arguments. rx_hook() is
also renamed to rx_skb_hook() to ensure that out of the tree
users notice the API change.

In this way any rx_skb_hook() implementation can perform all
the needed operations to properly (and safely) access the
skb data.

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 include/linux/netpoll.h |  5 +++--
 net/core/netpoll.c      | 31 ++++++++++++++++++-------------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index f3c7c24..fbfdb9d 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -24,7 +24,8 @@ struct netpoll {
 	struct net_device *dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
-	void (*rx_hook)(struct netpoll *, int, char *, int);
+	void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb,
+			    int offset, int len);
 
 	union inet_addr local_ip, remote_ip;
 	bool ipv6;
@@ -41,7 +42,7 @@ struct netpoll_info {
 	unsigned long rx_flags;
 	spinlock_t rx_lock;
 	struct semaphore dev_lock;
-	struct list_head rx_np; /* netpolls that registered an rx_hook */
+	struct list_head rx_np; /* netpolls that registered an rx_skb_hook */
 
 	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
 	struct sk_buff_head txq;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fc75c9e..8f97199 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
 
 			netpoll_send_skb(np, send_skb);
 
-			/* If there are several rx_hooks for the same address,
-			   we're fine by sending a single reply */
+			/* If there are several rx_skb_hooks for the same
+			 * address we're fine by sending a single reply
+			 */
 			break;
 		}
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
 
 			netpoll_send_skb(np, send_skb);
 
-			/* If there are several rx_hooks for the same address,
-			   we're fine by sending a single reply */
+			/* If there are several rx_skb_hooks for the same
+			 * address, we're fine by sending a single reply
+			 */
 			break;
 		}
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb)
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	int proto, len, ulen;
-	int hits = 0;
+	int proto, len, ulen, data_len;
+	int hits = 0, offset;
 	const struct iphdr *iph;
 	struct udphdr *uh;
 	struct netpoll *np, *tmp;
+	uint16_t source;
 
 	if (list_empty(&npinfo->rx_np))
 		goto out;
@@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 
 		len -= iph->ihl*4;
 		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		offset = (unsigned char *)(uh + 1) - skb->data;
 		ulen = ntohs(uh->len);
+		data_len = skb->len - offset;
+		source = ntohs(uh->source);
 
 		if (ulen != len)
 			goto out;
@@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 			if (np->local_port && np->local_port != ntohs(uh->dest))
 				continue;
 
-			np->rx_hook(np, ntohs(uh->source),
-				       (char *)(uh+1),
-				       ulen - sizeof(struct udphdr));
+			np->rx_skb_hook(np, source, skb, offset, data_len);
 			hits++;
 		}
 	} else {
@@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 			goto out;
 		uh = udp_hdr(skb);
+		offset = (unsigned char *)(uh + 1) - skb->data;
 		ulen = ntohs(uh->len);
+		data_len = skb->len - offset;
+		source = ntohs(uh->source);
 		if (ulen != skb->len)
 			goto out;
 		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
@@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 			if (np->local_port && np->local_port != ntohs(uh->dest))
 				continue;
 
-			np->rx_hook(np, ntohs(uh->source),
-				       (char *)(uh+1),
-				       ulen - sizeof(struct udphdr));
+			np->rx_skb_hook(np, source, skb, offset, data_len);
 			hits++;
 		}
 #endif
@@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	npinfo->netpoll = np;
 
-	if (np->rx_hook) {
+	if (np->rx_skb_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 		list_add_tail(&np->rx, &npinfo->rx_np);
-- 
1.8.4

^ permalink raw reply related

* Re: -27% netperf TCP_STREAM regression by "tcp_memcontrol: Kill struct tcp_memcontrol"
From: Fengguang Wu @ 2013-10-23 22:07 UTC (permalink / raw)
  To: Christoph Paasch; +Cc: Eric W. Biederman, David Miller, netdev, linux-kernel
In-Reply-To: <20131023122543.GH5132@cpaasch-mac>

> -       return !!sk->sk_prot->memory_pressure;
> +       return !!*sk->sk_prot->memory_pressure;

Good catch, Christoph! With no surprise, it restores the performance:

    a4fe34bf902b8f709c63      2e685cad57906e19add7      a235435d612680e595ea  
------------------------  ------------------------  ------------------------  
                  707.40       -41.0%       417.50        -8.8%       645.00  lkp-nex04/micro/netperf/120s-200%-TCP_STREAM
                 2775.60       -23.7%      2116.50        +2.1%      2834.00  lkp-sb03/micro/netperf/120s-200%-TCP_STREAM
                 3483.00       -27.2%      2534.00        -0.1%      3479.00  TOTAL netperf.Throughput_Mbps

It's a bit late, but

Tested-by: Fengguang Wu <fengguang.wu@intel.com>

Thanks,
Fengguang

^ permalink raw reply

* Re: Big performance loss from 3.4.63 to 3.10.13 when routing ipv4
From: Wolfgang Walter @ 2013-10-23 22:52 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, hannes, netdev, klassert
In-Reply-To: <1382547992.7572.31.camel@edumazet-glaptop.roam.corp.google.com>

On Wednesday 23 October 2013 10:06:32 Eric Dumazet wrote:
> On Wed, 2013-10-23 at 18:59 +0200, Wolfgang Walter wrote:
> > Ah, ok. I use SLUB, but SLABINFO=y.
> > 
> > Without much traffic it is:
> > 
> > # grep dst /proc/slabinfo
> > xfrm_dst_cache      4435   4608    448   36    4 : tunables    0    0    0
> > : slabdata    128    128      0
> > 
> > on the big one.
> > 
> > I can recompile the kernels with SLAB instead of SLUB if SLAB gives more
> > usefull infos.
> Not needed, because it seems we do not merge this SLUB cache with
> another one.

Ok. I can't see xfrm_dst_cache on 32bit-systems, though.

> 
> So please post this information, because I believe the default should be
> 65536, not 1024 or 4096
> 

Indeed I already saw higher values, at the moment I see:

# while true; do grep dst /proc/slabinfo ; sleep 1; done
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12636  12636    448   36    4 : tunables    0    0    0 : slabdata    351    351      0
xfrm_dst_cache     12708  12708    448   36    4 : tunables    0    0    0 : slabdata    353    353      0
xfrm_dst_cache     11529  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11599  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11633  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11633  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11633  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11700  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11763  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11798  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     11964  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     12139  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     12244  12276    448   36    4 : tunables    0    0    0 : slabdata    341    341      0
xfrm_dst_cache     12312  12312    448   36    4 : tunables    0    0    0 : slabdata    342    342      0
xfrm_dst_cache     12492  12492    448   36    4 : tunables    0    0    0 : slabdata    347    347      0



Regards,
-- 
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts

^ permalink raw reply

* [PATCH v2 net-next] fix rtnl notification in atomic context
From: Alexei Starovoitov @ 2013-10-23 23:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: Nicolas Dichtel, Cong Wang, Veaceslav Falico, netdev

commit 991fb3f74c "dev: always advertise rx_flags changes via netlink"
introduced rtnl notification from __dev_set_promiscuity(),
which can be called in atomic context.

Steps to reproduce:
ip tuntap add dev tap1 mode tap
ifconfig tap1 up
tcpdump -nei tap1 &
ip tuntap del dev tap1 mode tap

[  271.627994] device tap1 left promiscuous mode
[  271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940
[  271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip
[  271.677525] INFO: lockdep is turned off.
[  271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G        W    3.12.0-rc3+ #73
[  271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[  271.731254]  ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428
[  271.760261]  ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8
[  271.790683]  0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8
[  271.822332] Call Trace:
[  271.838234]  [<ffffffff817544e5>] dump_stack+0x55/0x76
[  271.854446]  [<ffffffff8108bad1>] __might_sleep+0x181/0x240
[  271.870836]  [<ffffffff81110ff8>] ? rcu_irq_exit+0x68/0xb0
[  271.887076]  [<ffffffff811a80be>] kmem_cache_alloc_node+0x4e/0x2a0
[  271.903368]  [<ffffffff810b4ddc>] ? vprintk_emit+0x1dc/0x5a0
[  271.919716]  [<ffffffff81614d67>] ? __alloc_skb+0x57/0x2a0
[  271.936088]  [<ffffffff810b4de0>] ? vprintk_emit+0x1e0/0x5a0
[  271.952504]  [<ffffffff81614d67>] __alloc_skb+0x57/0x2a0
[  271.968902]  [<ffffffff8163a0b2>] rtmsg_ifinfo+0x52/0x100
[  271.985302]  [<ffffffff8162ac6d>] __dev_notify_flags+0xad/0xc0
[  272.001642]  [<ffffffff8162ad0c>] __dev_set_promiscuity+0x8c/0x1c0
[  272.017917]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.033961]  [<ffffffff8162b109>] dev_set_promiscuity+0x29/0x50
[  272.049855]  [<ffffffff8172e937>] packet_dev_mc+0x87/0xc0
[  272.065494]  [<ffffffff81732052>] packet_notifier+0x1b2/0x380
[  272.080915]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.096009]  [<ffffffff81761c66>] notifier_call_chain+0x66/0x150
[  272.110803]  [<ffffffff8108503e>] __raw_notifier_call_chain+0xe/0x10
[  272.125468]  [<ffffffff81085056>] raw_notifier_call_chain+0x16/0x20
[  272.139984]  [<ffffffff81620190>] call_netdevice_notifiers_info+0x40/0x70
[  272.154523]  [<ffffffff816201d6>] call_netdevice_notifiers+0x16/0x20
[  272.168552]  [<ffffffff816224c5>] rollback_registered_many+0x145/0x240
[  272.182263]  [<ffffffff81622641>] rollback_registered+0x31/0x40
[  272.195369]  [<ffffffff816229c8>] unregister_netdevice_queue+0x58/0x90
[  272.208230]  [<ffffffff81547ca0>] __tun_detach+0x140/0x340
[  272.220686]  [<ffffffff81547ed6>] tun_chr_close+0x36/0x60

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 drivers/net/bonding/bond_main.c |    4 ++--
 include/linux/rtnetlink.h       |    2 +-
 net/core/dev.c                  |   16 ++++++++--------
 net/core/rtnetlink.c            |    9 +++++----
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2daa066..a141f40 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1213,7 +1213,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
 	if (err)
 		return err;
 	slave_dev->flags |= IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 	return 0;
 }
 
@@ -1222,7 +1222,7 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev,
 {
 	netdev_upper_dev_unlink(slave_dev, bond_dev);
 	slave_dev->flags &= ~IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 }
 
 /* enslave device <slave> to bond device <master> */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f28544b..939428a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -15,7 +15,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
-extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
 
 /* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0918aad..5d7e821 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev)
 	if (ret < 0)
 		return ret;
 
-	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 
 	return ret;
@@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head)
 	__dev_close_many(head);
 
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
 		list_del_init(&dev->close_list);
 	}
@@ -5257,7 +5257,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 	unsigned int changes = dev->flags ^ old_flags;
 
 	if (gchanges)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
@@ -5489,7 +5489,7 @@ static void rollback_registered_many(struct list_head *head)
 
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 		/*
 		 *	Flush the unicast and multicast chains
@@ -5888,7 +5888,7 @@ int register_netdevice(struct net_device *dev)
 	 */
 	if (!dev->rtnl_link_ops ||
 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 out:
 	return ret;
@@ -6500,7 +6500,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 	/*
 	 *	Flush the unicast and multicast chains
@@ -6539,7 +6539,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 	synchronize_net();
 	err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4aedf03..cf67144 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
 	if (skb == NULL)
 		goto errout;
 
@@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 	return;
 errout:
 	if (err < 0)
@@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_JOIN:
 		break;
 	default:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 		break;
 	}
 	return NOTIFY_DONE;
-- 
1.7.9.5

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox