Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/5] ipsec: check return value of skb_to_sgvec always
From: Jason A. Donenfeld @ 2017-04-25 14:08 UTC (permalink / raw)
  To: netdev, linux-kernel, davem, David.Laight, kernel-hardening
  Cc: Jason A. Donenfeld
In-Reply-To: <20170425140809.23881-1-Jason@zx2c4.com>

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 net/ipv4/ah4.c  |  8 ++++++--
 net/ipv4/esp4.c | 30 ++++++++++++++++++++----------
 net/ipv6/ah6.c  |  8 ++++++--
 net/ipv6/esp6.c | 31 +++++++++++++++++++++----------
 4 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 22377c8ff14b..e8f862358518 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -220,7 +220,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
@@ -393,7 +395,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb_push(skb, ihl);
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b1e24446e297..42cb09cc8533 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -360,9 +360,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 			esph = esp_output_set_extra(skb, esph, extra);
 
 			sg_init_table(sg, nfrags);
-			skb_to_sgvec(skb, sg,
-				     (unsigned char *)esph - skb->data,
-				     assoclen + ivlen + clen + alen);
+			err = skb_to_sgvec(skb, sg,
+				           (unsigned char *)esph - skb->data,
+				           assoclen + ivlen + clen + alen);
+			if (unlikely(err < 0)) {
+				spin_unlock_bh(&x->lock);
+				goto error;
+			}
 
 			allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
 
@@ -381,11 +385,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 			pfrag->offset = pfrag->offset + allocsize;
 
 			sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
-			skb_to_sgvec(skb, dsg,
-				     (unsigned char *)esph - skb->data,
-				     assoclen + ivlen + clen + alen);
+			err = skb_to_sgvec(skb, dsg,
+				           (unsigned char *)esph - skb->data,
+				           assoclen + ivlen + clen + alen);
 
 			spin_unlock_bh(&x->lock);
+			if (unlikely(err < 0))
+				goto error;
 
 			goto skip_cow2;
 		}
@@ -422,9 +428,11 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	esph = esp_output_set_extra(skb, esph, extra);
 
 	sg_init_table(sg, nfrags);
-	skb_to_sgvec(skb, sg,
-		     (unsigned char *)esph - skb->data,
-		     assoclen + ivlen + clen + alen);
+	err = skb_to_sgvec(skb, sg,
+		           (unsigned char *)esph - skb->data,
+		           assoclen + ivlen + clen + alen);
+	if (unlikely(err < 0))
+		goto error;
 
 skip_cow2:
 	if ((x->props.flags & XFRM_STATE_ESN))
@@ -658,7 +666,9 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	esp_input_set_header(skb, seqhi);
 
 	sg_init_table(sg, nfrags);
-	skb_to_sgvec(skb, sg, 0, skb->len);
+	err = skb_to_sgvec(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dda6035e3b84..755f38271dd5 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -423,7 +423,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
@@ -606,7 +608,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	ip6h->hop_limit   = 0;
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ff54faa75631..017e2c2d36e1 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -339,9 +339,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 			esph = esp_output_set_esn(skb, esph, seqhi);
 
 			sg_init_table(sg, nfrags);
-			skb_to_sgvec(skb, sg,
-				     (unsigned char *)esph - skb->data,
-				     assoclen + ivlen + clen + alen);
+			err = skb_to_sgvec(skb, sg,
+				           (unsigned char *)esph - skb->data,
+				           assoclen + ivlen + clen + alen);
+			if (unlikely(err < 0)) {
+				spin_unlock_bh(&x->lock);
+				goto error;
+			}
 
 			allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
 
@@ -360,12 +364,15 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 			pfrag->offset = pfrag->offset + allocsize;
 
 			sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
-			skb_to_sgvec(skb, dsg,
-				     (unsigned char *)esph - skb->data,
-				     assoclen + ivlen + clen + alen);
+			err = skb_to_sgvec(skb, dsg,
+				           (unsigned char *)esph - skb->data,
+				           assoclen + ivlen + clen + alen);
 
 			spin_unlock_bh(&x->lock);
 
+			if (unlikely(err < 0))
+				goto error;
+
 			goto skip_cow2;
 		}
 	}
@@ -403,9 +410,11 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	esph = esp_output_set_esn(skb, esph, seqhi);
 
 	sg_init_table(sg, nfrags);
-	skb_to_sgvec(skb, sg,
-		     (unsigned char *)esph - skb->data,
-		     assoclen + ivlen + clen + alen);
+	err = skb_to_sgvec(skb, sg,
+		           (unsigned char *)esph - skb->data,
+		           assoclen + ivlen + clen + alen);
+	if (unlikely(err < 0))
+		goto error;
 
 skip_cow2:
 	if ((x->props.flags & XFRM_STATE_ESN))
@@ -600,7 +609,9 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	esp_input_set_header(skb, seqhi);
 
 	sg_init_table(sg, nfrags);
-	skb_to_sgvec(skb, sg, 0, skb->len);
+	ret = skb_to_sgvec(skb, sg, 0, skb->len);
+	if (unlikely(ret < 0))
+		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-- 
2.12.2

^ permalink raw reply related

* [PATCH 1/5] skbuff: return -EMSGSIZE in skb_to_sgvec to prevent overflow
From: Jason A. Donenfeld @ 2017-04-25 14:08 UTC (permalink / raw)
  To: netdev, linux-kernel, davem, David.Laight, kernel-hardening
  Cc: Jason A. Donenfeld

This is a defense-in-depth measure in response to bugs like
4d6fa57b4dab0d77f4d8e9d9c73d1e63f6fe8fee.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 net/core/skbuff.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f86bf69cfb8d..3c2a7f323722 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3489,7 +3489,9 @@ void __init skb_init(void)
  *	@len: Length of buffer space to be mapped
  *
  *	Fill the specified scatter-gather list with mappings/pointers into a
- *	region of the buffer space attached to a socket buffer.
+ *	region of the buffer space attached to a socket buffer. Returns either
+ *	the number of scatterlist items used, or -EMSGSIZE if the contents
+ *	could not fit.
  */
 static int
 __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
@@ -3512,6 +3514,9 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
+		if (elt && sg_is_last(&sg[elt - 1]))
+			return -EMSGSIZE;
+
 		WARN_ON(start > offset + len);
 
 		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
@@ -3535,6 +3540,9 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 
 		WARN_ON(start > offset + len);
 
+		if (elt && sg_is_last(&sg[elt - 1]))
+			return -EMSGSIZE;
+
 		end = start + frag_iter->len;
 		if ((copy = end - offset) > 0) {
 			if (copy > len)
-- 
2.12.2

^ permalink raw reply related

* Re: [PATCH] net: bridge: suppress broadcast when multicast flood is disabled
From: Nikolay Aleksandrov @ 2017-04-25 14:03 UTC (permalink / raw)
  To: Mike Manning, netdev; +Cc: David S. Miller, roopa
In-Reply-To: <a96ddd28-6d99-84d0-563a-2493a09a9e60@brocade.com>

On 25/04/17 16:32, Mike Manning wrote:
> On 24/04/17 20:52, Nikolay Aleksandrov wrote:
>> On 24/04/17 17:09, Mike Manning wrote:
>>> Flood suppression for packets that are not unicast needs to be handled
>>> consistently by also not flooding broadcast packets. As broadcast is a
>>> special case of multicast, the same kernel parameter should be used to
>>> suppress flooding for both of these packet types.
>>>
>>> Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
>>> Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
>>> Signed-off-by: Mike Manning <mmanning@brocade.com>
>>> ---
>>>  net/bridge/br_forward.c | 17 ++++++++++-------
>>>  1 file changed, 10 insertions(+), 7 deletions(-)
>>>
>>
>> I do not agree that this is a bug fix, the behaviour was intentional and is close to how HW
>> handles this flag. It has been like that for a few releases and changing it may impact setups
>> that use the flag since up until now they've seen the broadcast but not multicast packets and
>> suddenly their broadcast will stop.
>>
>> I think it would be better to introduce a third flag for bcast in net-next and use that to
>> filter it since that would give us the ability to program HW that can distinguish these
>> and have both options available, moreover it will not break any user setups relying on
>> the current flag behaviour and we have such setups.
>>
>> Thanks,
>>  Nik
>>
>>
> 
> Hi Nik,
> What is the usecase for flooding broadcast but not multicast please? Is the lack of flood
> suppression for broadcast just something that has not been explicitly tested for in those
> setups? This is the case for us, the bug raised only at this stage of the release cycle.
> While adding another kernel param is an option, I would only do so if absolutely necessary
> so as to avoid proliferation of params. Also to justify adding such a flag for broadcast
> suppression, I would need to add a comment to explain that while broadcast is a subset of
> multicast, the multicast flood suppression flag excludes broadcast.
> 
> Thanks
> Mike
> 

Hi Mike,
Stopping non-locally originating ARP requests is a pretty serious change
that affects many setups and changes the intended behaviour of this
option which was introduced specifically for unknown multicast flooding.
There're other options - you could filter the broadcast at the firewall
level, at least now you have that option but with this patch applied it
will be gone. Most network vendors differentiate the same types of
traffic as the ones listed below and allow to control them separately
which is much more flexible, I would like to keep it that way.

Currently the bridge differentiates intentionally between:
- known/unknown unicast controlled via fdbs/BR_FLOOD respectively
- known/unknown multicast controlled via mdbs/BR_MCAST_FLOOD respectively
- broadcast controlled only via firewall at this point

Fortunately the broadcast traffic doesn't have any dependent internal
state and can easily be controlled via the firewall thus rendering such
option unnecessary indeed, but I don't mind having it for completeness.
As for the comment, feel free to add it, I've actually added the exact
same comment some time ago in commit 8addd5e7d3a5 ("net: bridge: change
unicast boolean to exact pkt_type").

Cheers,
 Nik

^ permalink raw reply

* [PATCH net] ipv6: fix source routing
From: Sabrina Dubroca @ 2017-04-25 13:56 UTC (permalink / raw)
  To: netdev; +Cc: Sabrina Dubroca, Hannes Frederic Sowa, David Lebrun

Commit a149e7c7ce81 ("ipv6: sr: add support for SRH injection through
setsockopt") introduced handling of IPV6_SRCRT_TYPE_4, but at the same
time restricted it to only IPV6_SRCRT_TYPE_0 and
IPV6_SRCRT_TYPE_4. Previously, ipv6_push_exthdr() and fl6_update_dst()
would also handle other values (ie STRICT and TYPE_2).

Restore previous source routing behavior, by handling IPV6_SRCRT_STRICT
and IPV6_SRCRT_TYPE_2 the same way as IPV6_SRCRT_TYPE_0 in
ipv6_push_exthdr() and fl6_update_dst().

Fixes: a149e7c7ce81 ("ipv6: sr: add support for SRH injection through setsockopt")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 net/ipv6/exthdrs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 25192a3b0cd7..d32e2110aff2 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -909,6 +909,8 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 {
 	switch (opt->type) {
 	case IPV6_SRCRT_TYPE_0:
+	case IPV6_SRCRT_STRICT:
+	case IPV6_SRCRT_TYPE_2:
 		ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
 		break;
 	case IPV6_SRCRT_TYPE_4:
@@ -1163,6 +1165,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
 
 	switch (opt->srcrt->type) {
 	case IPV6_SRCRT_TYPE_0:
+	case IPV6_SRCRT_STRICT:
+	case IPV6_SRCRT_TYPE_2:
 		fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
 		break;
 	case IPV6_SRCRT_TYPE_4:
-- 
2.12.2

^ permalink raw reply related

* Re: [RFC PATCH 3/7] net: add option to get information about timestamped packets
From: Miroslav Lichvar @ 2017-04-25 13:56 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, Richard Cochran, Willem de Bruijn,
	Soheil Hassas Yeganeh, Keller, Jacob E, Denny Page, Jiri Benc
In-Reply-To: <CAF=yD-+ErPH6Qb4y9TcAMi-7X+ebxW3epq3Zxch_vrt1r3gspQ@mail.gmail.com>

On Mon, Apr 24, 2017 at 11:18:13AM -0400, Willem de Bruijn wrote:
> On Mon, Apr 24, 2017 at 5:00 AM, Miroslav Lichvar <mlichvar@redhat.com> wrote:
> > Would "skb->data - skb->head -
> > skb->mac_header + skb->len" always work as the L2 length for received
> > packets at the time when the cmsg is prepared?
> 
> (skb->data - skb->head) - skb->mac_header computes the length
> of data before the mac, such as reserve?

data - head includes the reserve, but mac_header does too, so I think
it should be just the length of MAC header and everything up to the
data.

> Do you mean skb->data -
> skb->mac_header (or - skb_mac_offset(skb))?

That would give me a pointer? If I used skb_mac_offset(), the total
length would be just skb->len - skb_mac_offset()?

> > As for the original ifindex, it seems to me it does need to be saved
> > to a new field since __netif_receive_skb_core() intentionally
> > overwrites skb->skb_iif. What would be the best place for it, sk_buff
> > or skb_shared_info?
> 
> Finding storage space on the receive path will not be easy.
> 
> One shortcut to avoid storing this information explicitly is to look up
> the device from skb->napi_id.

Thanks. This looks promising. It will depend on CONFIG_NET_RX_BUSY_POLL,
but I guess that's ok. It nicely isolates all costs to the timestamping
option.

-- 
Miroslav Lichvar

^ permalink raw reply

* Re: [PATCH net] net: batman-adv: Fix possible memleaks when fail to register_netdevice
From: Sven Eckelmann @ 2017-04-25 13:53 UTC (permalink / raw)
  To: b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
  Cc: mareklindner-rVWd3aGhH2z5bpWLKbzFeg,
	netdev-u79uwXL29TY76Z2rM5mHXA, a, Gao Feng,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, gfree.wind-H32Fclmsjq1BDgjK7y7TUQ
In-Reply-To: <1493121800-28066-1-git-send-email-gfree.wind-H32Fclmsjq1BDgjK7y7TUQ@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 911 bytes --]

On Dienstag, 25. April 2017 20:03:20 CEST gfree.wind-H32Fclmsjq1BDgjK7y7TUQ@public.gmane.org wrote:
> From: Gao Feng <fgao-KlmEoCYek3zQT0dZR+AlfA@public.gmane.org>
> 
> Because the func batadv_softif_init_late allocate some resources and
> it would be invoked in register_netdevice. So we need to invoke the
> func batadv_softif_free instead of free_netdev to cleanup when fail
> to register_netdevice.

I could be wrong, but shouldn't the destructor be replaced with free_netdevice 
and the batadv_softif_free (without the free_netdev) used as ndo_uninit? The 
line you've changed should then be kept as free_netdevice.

At least this seems to be important when using rtnl_newlink() instead of the 
legacy sysfs netdev stuff from batman-adv. rtnl_newlink() would also only call 
free_netdevice and thus also not run batadv_softif_free. This seems to be only 
fixable by calling ndo_uninit.

Kind regards,
	Sven

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: Network cooling device and how to control NIC speed on thermal condition
From: Alan Cox @ 2017-04-25 13:45 UTC (permalink / raw)
  To: Waldemar Rymarkiewicz; +Cc: netdev, linux-kernel
In-Reply-To: <CAHKzcEMQPQzg6PYCSuv7Ufad+4AZ2gnqMgz3-VH5NTeb_pv4zQ@mail.gmail.com>

> I am looking on Linux thermal framework and on how to cool down the
> system effectively when it hits thermal condition. Already existing
> cooling methods cpu_cooling and clock_cooling are good. However, I
> wanted to go further and dynamically control also a switch ports'
> speed based on thermal condition. Lowering speed means less power,
> less power means lower temp.
> 
> Is there any in-kernel interface to configure switch port/NIC from other driver?

No but you can always hook that kind of functionality to the thermal
daemon. However I'd be careful with your assumptions. Lower speed also
means more time active.

https://github.com/01org/thermal_daemon

For example if you run a big encoding job on an atom instead of an Intel
i7, the atom will often not only take way longer but actually use more
total power than the i7 did.

Thus it would often be far more efficient to time synchronize your
systems, batch up data on the collecting end, have the processing node
wake up on an alarm, collect data from the other node and then actually
go back into suspend.

Modern processors are generally very good in idle state (less so
sometimes the platform around them) so trying to lower speeds may
actually be the wrong thing to do, versus say trying to batch up activity
so that you handle a burst and then sleep the entire platform.

It also makes sense to keep policy like that mostly user space - because
what you do is going to be very device specific - eg with things like
dimming the screen, lowering the wifi power, pausing some system
services, pausing battery charge etc.

Now at platform design time there are some interesting trade offs between
100Mbit and 1Gbit ethernet although less so than there used to be 8)

Alan

^ permalink raw reply

* RE: [PATCH net-next] qed: fix invalid use of sizeof in qed_alloc_qm_data()
From: Mintz, Yuval @ 2017-04-25 13:43 UTC (permalink / raw)
  To: Wei Yongjun; +Cc: Wei Yongjun, netdev@vger.kernel.org, Elior, Ariel
In-Reply-To: <20170425070718.14790-1-weiyj.lk@gmail.com>

> sizeof() when applied to a pointer typed expression gives the size of the
> pointer, not that of the pointed data.
> 
> Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>

Thanks!
Acked-by: Yuval Mintz <Yuval.Mintz@cavium.com>

I'd also mention that -

Fixes: b5a9ee7cf3be ("qed: Revise QM configuration")

^ permalink raw reply

* Re: [PATCH] net: bridge: suppress broadcast when multicast flood is disabled
From: Mike Manning @ 2017-04-25 13:32 UTC (permalink / raw)
  To: Nikolay Aleksandrov, netdev; +Cc: David S. Miller, roopa
In-Reply-To: <5266c6fd-54e9-01c7-2379-0b7f11a3cde3@cumulusnetworks.com>

On 24/04/17 20:52, Nikolay Aleksandrov wrote:
> On 24/04/17 17:09, Mike Manning wrote:
>> Flood suppression for packets that are not unicast needs to be handled
>> consistently by also not flooding broadcast packets. As broadcast is a
>> special case of multicast, the same kernel parameter should be used to
>> suppress flooding for both of these packet types.
>>
>> Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
>> Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
>> Signed-off-by: Mike Manning <mmanning@brocade.com>
>> ---
>>  net/bridge/br_forward.c | 17 ++++++++++-------
>>  1 file changed, 10 insertions(+), 7 deletions(-)
>>
> 
> I do not agree that this is a bug fix, the behaviour was intentional and is close to how HW
> handles this flag. It has been like that for a few releases and changing it may impact setups
> that use the flag since up until now they've seen the broadcast but not multicast packets and
> suddenly their broadcast will stop.
> 
> I think it would be better to introduce a third flag for bcast in net-next and use that to
> filter it since that would give us the ability to program HW that can distinguish these
> and have both options available, moreover it will not break any user setups relying on
> the current flag behaviour and we have such setups.
> 
> Thanks,
>  Nik
> 
> 

Hi Nik,
What is the usecase for flooding broadcast but not multicast please? Is the lack of flood
suppression for broadcast just something that has not been explicitly tested for in those
setups? This is the case for us, the bug raised only at this stage of the release cycle.
While adding another kernel param is an option, I would only do so if absolutely necessary
so as to avoid proliferation of params. Also to justify adding such a flag for broadcast
suppression, I would need to add a comment to explain that while broadcast is a subset of
multicast, the multicast flood suppression flag excludes broadcast.

Thanks
Mike

^ permalink raw reply

* Re: [PATCH net-next] rhashtable: remove insecure_max_entries param
From: Herbert Xu @ 2017-04-25 13:28 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev
In-Reply-To: <20170425112356.GB11322@breakpoint.cc>

On Tue, Apr 25, 2017 at 01:23:56PM +0200, Florian Westphal wrote:
>
> What extra cost?
> 
> The only change is that ht->nelems has to be right-shifted by one,
> I don't think that warrants extra space in struct rhashtable, its
> already way too large (I think we can reduce its size further).

I see at least one hole on 64-bit which means that you can fit
it into struct rhashtable for free.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH] ipv6: ensure message length for raw socket is at least sizeof(ipv6hdr)
From: Alexander Potapenko @ 2017-04-25 13:18 UTC (permalink / raw)
  To: dvyukov, kcc, edumazet, davem, kuznet; +Cc: linux-kernel, netdev

rawv6_send_hdrinc() expects that the buffer copied from the userspace
contains the IPv6 header, so if too few bytes are copied parts of the
header may remain uninitialized.

This bug has been detected with KMSAN.

Signed-off-by: Alexander Potapenko <glider@google.com>
---
For the record, the KMSAN report:

==================================================================
BUG: KMSAN: use of unitialized memory in nf_ct_frag6_gather+0xf5a/0x44a0
inter: 0
CPU: 0 PID: 1036 Comm: probe Not tainted 4.11.0-rc5+ #2455
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16
 dump_stack+0x143/0x1b0 lib/dump_stack.c:52
 kmsan_report+0x16b/0x1e0 mm/kmsan/kmsan.c:1078
 __kmsan_warning_32+0x5c/0xa0 mm/kmsan/kmsan_instr.c:510
 nf_ct_frag6_gather+0xf5a/0x44a0 net/ipv6/netfilter/nf_conntrack_reasm.c:577
 ipv6_defrag+0x1d9/0x280 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn ./include/linux/netfilter.h:102
 nf_hook_slow+0x13f/0x3c0 net/netfilter/core.c:310
 nf_hook ./include/linux/netfilter.h:212
 NF_HOOK ./include/linux/netfilter.h:255
 rawv6_send_hdrinc net/ipv6/raw.c:673
 rawv6_sendmsg+0x2fcb/0x41a0 net/ipv6/raw.c:919
 inet_sendmsg+0x3f8/0x6d0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633
 sock_sendmsg net/socket.c:643
 SYSC_sendto+0x6a5/0x7c0 net/socket.c:1696
 SyS_sendto+0xbc/0xe0 net/socket.c:1664
 do_syscall_64+0x72/0xa0 arch/x86/entry/common.c:285
 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246
RIP: 0033:0x436e03
RSP: 002b:00007ffce48baf38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000436e03
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: 00007ffce48baf90 R08: 00007ffce48baf50 R09: 000000000000001c
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000401790 R14: 0000000000401820 R15: 0000000000000000
origin: 00000000d9400053
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:362
 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:257
 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:270
 slab_alloc_node mm/slub.c:2735
 __kmalloc_node_track_caller+0x1f4/0x390 mm/slub.c:4341
 __kmalloc_reserve net/core/skbuff.c:138
 __alloc_skb+0x2cd/0x740 net/core/skbuff.c:231
 alloc_skb ./include/linux/skbuff.h:933
 alloc_skb_with_frags+0x209/0xbc0 net/core/skbuff.c:4678
 sock_alloc_send_pskb+0x9ff/0xe00 net/core/sock.c:1903
 sock_alloc_send_skb+0xe4/0x100 net/core/sock.c:1920
 rawv6_send_hdrinc net/ipv6/raw.c:638
 rawv6_sendmsg+0x2918/0x41a0 net/ipv6/raw.c:919
 inet_sendmsg+0x3f8/0x6d0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633
 sock_sendmsg net/socket.c:643
 SYSC_sendto+0x6a5/0x7c0 net/socket.c:1696
 SyS_sendto+0xbc/0xe0 net/socket.c:1664
 do_syscall_64+0x72/0xa0 arch/x86/entry/common.c:285
 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246
==================================================================

, triggered by the following syscalls:
  socket(PF_INET6, SOCK_RAW, IPPROTO_RAW) = 3
  sendto(3, NULL, 0, 0, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "ff00::", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = -1 EPERM
---
 net/ipv6/raw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f174e76e6505..805464668bd8 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 		ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
+	if (length < sizeof(struct ipv6hdr))
+		return -EINVAL;
 	if (flags&MSG_PROBE)
 		goto out;
 
-- 
2.13.0.rc0.306.g87b477812d-goog

^ permalink raw reply related

* Re: Network cooling device and how to control NIC speed on thermal condition
From: Andrew Lunn @ 2017-04-25 13:17 UTC (permalink / raw)
  To: Waldemar Rymarkiewicz; +Cc: netdev, linux-kernel
In-Reply-To: <CAHKzcEMQPQzg6PYCSuv7Ufad+4AZ2gnqMgz3-VH5NTeb_pv4zQ@mail.gmail.com>

On Tue, Apr 25, 2017 at 10:36:28AM +0200, Waldemar Rymarkiewicz wrote:
> Hi,
> 
> I am not much aware of linux networking architecture so I'd like to
> ask first before will start to dig into the code. Appreciate any
> feedback.
> 
> I am looking on Linux thermal framework and on how to cool down the
> system effectively when it hits thermal condition. Already existing
> cooling methods cpu_cooling and clock_cooling are good. However, I
> wanted to go further and dynamically control also a switch ports'
> speed based on thermal condition. Lowering speed means less power,
> less power means lower temp.
> 
> Is there any in-kernel interface to configure switch port/NIC from other driver?

Hi Waldemar

Linux models switch ports as network interfaces, so mostly, there is
little difference between a NIC and a switch port. What you define for
one, should work for the other. Mostly.

However, i don't think you need to be too worried about the NIC level
of the stack. You can mostly do this higher up in the stack. I would
expect there is a relationship between Packets per Second and
generated heat. You might want the NIC to give you some sort of
heating coefficient, 1PPS is ~ 10uC. Given that, you want to throttle
the PPS in the generic queuing layers. This sounds like a TC filter.
You have userspace install a TC filter, which is a net_cooling device.

This however does not directly work for so called 'fastpath' traffic
in switches. Frames which ingress one switch port and egress another
switch port are mostly never seen by Linux. So a software TC filter
will not affect them. However, there is infrastructure in place to
accelerate TC filters by pushing them down into the hardware. So the
same basic concept can be used for switch fastpath traffic, but
requires a bit more work.

    Andrew

^ permalink raw reply

* Re: [PATCH net-next v3 0/5] virtio-net tx napi
From: David Miller @ 2017-04-25 13:09 UTC (permalink / raw)
  To: willemdebruijn.kernel; +Cc: netdev, virtualization, willemb, mst
In-Reply-To: <20170424174930.82623-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Mon, 24 Apr 2017 13:49:25 -0400

> Add napi for virtio-net transmit completion processing.

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next v8 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
From: Jamal Hadi Salim @ 2017-04-25 13:01 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, xiyou.wangcong, eric.dumazet, netdev
In-Reply-To: <20170425121338.GC1867@nanopsycho.orion>

On 17-04-25 08:13 AM, Jiri Pirko wrote:
> Tue, Apr 25, 2017 at 01:54:06PM CEST, jhs@mojatatu.com wrote:


[..]

>> -#define TCAA_MAX 1
>> +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
>> + *
>> + * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>> + * actions in a dump. All dump responses will contain the number of actions
>> + * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
>> + *
>> + */
>> +#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
>
> BIT (I think I mentioned this before)
>

You did - but i took it out about two submissions back (per cover
letter) because it is no part of UAPI today. I noticed devlink was
using it but they defined  their own variant.
So if i added this, iproute2 doesnt compile. I could fix iproute2
to move it somewhere to a common header then restore this.

>> +#define VALID_TCA_FLAGS TCA_FLAG_LARGE_DUMP_ON
>
> Again, namespace please. "TCA_ROOT_FLAGS_VALID"

Good point.

> Why is this UAPI?
>

Should not be. I will fix in next update.


>
>>
>> /* New extended info filters for IFLA_EXT_MASK */
>> #define RTEXT_FILTER_VF		(1 << 0)
>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>> index 9ce22b7..2756213 100644
>> --- a/net/sched/act_api.c
>> +++ b/net/sched/act_api.c
>> @@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> 			   struct netlink_callback *cb)
>> {
>> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>> +	u32 act_flags = cb->args[2];
>> 	struct nlattr *nest;
>>
>> 	spin_lock_bh(&hinfo->lock);
>> @@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> 			}
>> 			nla_nest_end(skb, nest);
>> 			n_i++;
>> -			if (n_i >= TCA_ACT_MAX_PRIO)
>> +			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
>> +			    n_i >= TCA_ACT_MAX_PRIO)
>> 				goto done;
>> 		}
>> 	}
>> done:
>> 	spin_unlock_bh(&hinfo->lock);
>> -	if (n_i)
>> +	if (n_i) {
>> 		cb->args[0] += n_i;
>> +		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
>> +			cb->args[1] = n_i;
>> +	}
>> 	return n_i;
>>
>> nla_put_failure:
>> @@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
>> 	return tcf_add_notify(net, n, &actions, portid);
>> }
>>
>> +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
>> +	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
>> +};
>> +
>> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 			 struct netlink_ext_ack *extack)
>> {
>> 	struct net *net = sock_net(skb->sk);
>> -	struct nlattr *tca[TCAA_MAX + 1];
>> +	struct nlattr *tca[TCA_ROOT_MAX + 1];
>> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
>> 	int ret = 0, ovr = 0;
>>
>> @@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 	    !netlink_capable(skb, CAP_NET_ADMIN))
>> 		return -EPERM;
>>
>> -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
>> +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
>> 			  extack);
>> 	if (ret < 0)
>> 		return ret;
>> @@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 	return ret;
>> }
>>
>> -static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>> +static struct nlattr *find_dump_kind(struct nlattr **nla)
>> {
>> 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
>> 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
>> -	struct nlattr *nla[TCAA_MAX + 1];
>> 	struct nlattr *kind;
>>
>> -	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
>> -			NULL, NULL) < 0)
>> -		return NULL;
>> 	tb1 = nla[TCA_ACT_TAB];
>> 	if (tb1 == NULL)
>> 		return NULL;
>> @@ -1073,6 +1078,16 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>> 	return kind;
>> }
>>
>> +static inline bool tca_flags_valid(u32 act_flags)
>> +{
>> +	u32 invalid_flags_mask  = ~VALID_TCA_FLAGS;
>> +
>> +	if (act_flags & invalid_flags_mask)
>> +		return false;
>
> I don't see how this resolves anything. VALID_TCA_FLAGS is set in stone
> not going to change anytime in future, right?

Every time a new bit gets added VALID_TCA_FLAGS changes.

>Then the TCA_ROOT_FLAGS
> attr will always contain only one flag, right?

Not true - forever. Just in this patch discussion:
I have added 2 other flags since removed. So I cant predict the
future. You keep coming back to this assumption there will always
ever be this one flag. I am not following how you reach this
conclusion.

> Then, I don't see why do we need this dance... u8 flag attr resolves
> this in elegant way. I know I sound like a broken record. This is the
> last time I'm commenting on this. I give up.
>

Why is a u8 better than u32 Jiri?
The TLV space consumed is still 64 bits in both cases. If I define u8,
u16, u32 - it is still 64 bits being alloced. If i use a u8 then i have
24 bits which are pads - given current discussions I can never ever use
again!

If i keep 32 bits I am free to use those without ever changing the
data structure (except for the restrictions to now make sure nothing
gets set).

cheers,
jamal

^ permalink raw reply

* RE: [PATCH net] driver/net: Fix possible memleaks when fail to register_netdevice
From: Gao Feng @ 2017-04-25 12:51 UTC (permalink / raw)
  To: gfree.wind, jiri, davem, kuznet, jmorris, yoshfuji, kaber,
	steffen.klassert, herbert, netdev
In-Reply-To: <1493121710-27910-1-git-send-email-gfree.wind@foxmail.com>

> From: Gao Feng <fgao@ikuai8.com>
> 
> These drivers allocate kinds of resources in init routine, and free some
> resources in the destructor of net_device. It may cause memleak when some
> errors happen after register_netdevice invokes the init callback. Because
only
> the uninit callback is invoked in the error handler of register_netdevice,
but the
> destructor not. As a result, some resources are lost forever.
> 
> Now invokes the destructor instead of free_netdev somewhere, and free the
> left resources in the newlink func when fail to register_netdevice.
> 
> Signed-off-by: Gao Feng <fgao@ikuai8.com>
> ---
>  drivers/net/dummy.c      |  2 +-
>  drivers/net/ifb.c        |  2 +-
>  drivers/net/loopback.c   |  2 +-
>  drivers/net/team/team.c  | 11 ++++++++++-
>  drivers/net/veth.c       |  4 ++--
>  net/8021q/vlan_netlink.c |  6 +++++-
>  net/ipv4/ip_tunnel.c     |  9 ++++++++-
>  net/ipv6/ip6_gre.c       |  6 +++++-
>  net/ipv6/ip6_tunnel.c    | 12 ++++++++++--
>  net/ipv6/ip6_vti.c       |  7 ++++++-
>  net/ipv6/sit.c           |  5 ++++-
>  11 files changed, 53 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index
> 2c80611..55b8a50 100644
> --- a/drivers/net/dummy.c
> +++ b/drivers/net/dummy.c
> @@ -383,7 +383,7 @@ static int __init dummy_init_one(void)
>  	return 0;
> 
>  err:
> -	free_netdev(dev_dummy);
> +	dummy_free_netdev(dev_dummy);
>  	return err;
>  }
> 
> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 312fce7..a298371
100644
> --- a/drivers/net/ifb.c
> +++ b/drivers/net/ifb.c
> @@ -318,7 +318,7 @@ static int __init ifb_init_one(int index)
>  	return 0;
> 
>  err:
> -	free_netdev(dev_ifb);
> +	ifb_dev_free(dev_ifb);
>  	return err;
>  }
> 
> diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index
> b23b719..c4e1d4c 100644
> --- a/drivers/net/loopback.c
> +++ b/drivers/net/loopback.c
> @@ -208,7 +208,7 @@ static __net_init int loopback_net_init(struct net
*net)
> 
> 
>  out_free_netdev:
> -	free_netdev(dev);
> +	loopback_dev_free(dev);
>  out:
>  	if (net_eq(net, &init_net))
>  		panic("loopback: Failed to register netdevice: %d\n", err);
diff --git
> a/drivers/net/team/team.c b/drivers/net/team/team.c index f8c81f1..0bc80fb
> 100644
> --- a/drivers/net/team/team.c
> +++ b/drivers/net/team/team.c
> @@ -2109,10 +2109,19 @@ static void team_setup(struct net_device *dev)
> static int team_newlink(struct net *src_net, struct net_device *dev,
>  			struct nlattr *tb[], struct nlattr *data[])  {
> +	int ret;
> +
>  	if (tb[IFLA_ADDRESS] == NULL)
>  		eth_hw_addr_random(dev);
> 
> -	return register_netdevice(dev);
> +	ret = register_netdevice(dev);
> +	if (ret) {
> +		struct team *team = netdev_priv(dev);
> +
> +		free_percpu(team->pcpu_stats);
> +	}
> +
> +	return ret;
>  }
> 
>  static int team_validate(struct nlattr *tb[], struct nlattr *data[]) diff
--git
> a/drivers/net/veth.c b/drivers/net/veth.c index 8c39d6d..f60f5ee 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -457,13 +457,13 @@ static int veth_newlink(struct net *src_net, struct
> net_device *dev,
>  	return 0;
> 
>  err_register_dev:
> -	/* nothing to do */
> +	free_percpu(dev->vstats);
>  err_configure_peer:
>  	unregister_netdevice(peer);
>  	return err;
> 
>  err_register_peer:
> -	free_netdev(peer);
> +	veth_dev_free(peer);
>  	return err;
>  }
> 
> diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index
> 1270207..a15826a 100644
> --- a/net/8021q/vlan_netlink.c
> +++ b/net/8021q/vlan_netlink.c
> @@ -156,7 +156,11 @@ static int vlan_newlink(struct net *src_net, struct
> net_device *dev,
>  	if (err < 0)
>  		return err;
> 
> -	return register_vlan_dev(dev);
> +	err = register_vlan_dev(dev);
> +	if (err)
> +		free_percpu(vlan->vlan_pcpu_stats);
> +
> +	return err;
>  }
> 
>  static inline size_t vlan_qos_map_size(unsigned int n) diff --git
> a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 823abae..4acb296
> 100644
> --- a/net/ipv4/ip_tunnel.c
> +++ b/net/ipv4/ip_tunnel.c
> @@ -63,6 +63,8 @@
>  #include <net/ip6_route.h>
>  #endif
> 
> +static void ip_tunnel_dev_free(struct net_device *dev);
> +
>  static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)  {
>  	return hash_32((__force u32)key ^ (__force u32)remote, @@ -285,7
> +287,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
>  	return dev;
> 
>  failed_free:
> -	free_netdev(dev);
> +	ip_tunnel_dev_free(dev);
>  failed:
>  	return ERR_PTR(err);
>  }
> @@ -1099,7 +1101,12 @@ int ip_tunnel_newlink(struct net_device *dev,
> struct nlattr *tb[],
>  		dev->mtu = mtu;
> 
>  	ip_tunnel_add(itn, nt);
> +
> +	return 0;
>  out:
> +	gro_cells_destroy(&nt->gro_cells);
> +	dst_cache_destroy(&nt->dst_cache);
> +	free_percpu(dev->tstats);
>  	return err;
>  }
>  EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index
6fcb7cb..d409ad1
> 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -77,6 +77,7 @@ struct ip6gre_net {
>  static void ip6gre_tunnel_setup(struct net_device *dev);  static void
> ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);  static
void
> ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
> +static void ip6gre_dev_free(struct net_device *dev);
> 
>  /* Tunnel hash table */
> 
> @@ -351,7 +352,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net
> *net,
>  	return nt;
> 
>  failed_free:
> -	free_netdev(dev);
> +	ip6gre_dev_free(dev);
>  	return NULL;
>  }
> 
> @@ -1388,7 +1389,10 @@ static int ip6gre_newlink(struct net *src_net,
struct
> net_device *dev,
>  	dev_hold(dev);
>  	ip6gre_tunnel_link(ign, nt);
> 
> +	return 0;
>  out:
> +	dst_cache_destroy(&nt->dst_cache);
> +	free_percpu(dev->tstats);
>  	return err;
>  }
> 
> diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index
> 75fac93..95f512c 100644
> --- a/net/ipv6/ip6_tunnel.c
> +++ b/net/ipv6/ip6_tunnel.c
> @@ -1960,11 +1960,12 @@ static int ip6_tnl_newlink(struct net *src_net,
> struct net_device *dev,
>  	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
>  	struct ip6_tnl *nt, *t;
>  	struct ip_tunnel_encap ipencap;
> +	int err;
> 
>  	nt = netdev_priv(dev);
> 
>  	if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
> -		int err = ip6_tnl_encap_setup(nt, &ipencap);
> +		err = ip6_tnl_encap_setup(nt, &ipencap);
> 
>  		if (err < 0)
>  			return err;
> @@ -1981,7 +1982,14 @@ static int ip6_tnl_newlink(struct net *src_net,
> struct net_device *dev,
>  			return -EEXIST;
>  	}
> 
> -	return ip6_tnl_create2(dev);
> +	err = ip6_tnl_create2(dev);
> +	if (err) {
> +		gro_cells_destroy(&t->gro_cells);
> +		dst_cache_destroy(&t->dst_cache);
> +		free_percpu(dev->tstats);
> +	}
> +
> +	return err;
>  }
> 
>  static int ip6_tnl_changelink(struct net_device *dev, struct nlattr
*tb[], diff
> --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3d8a3b6..b201eef
100644
> --- a/net/ipv6/ip6_vti.c
> +++ b/net/ipv6/ip6_vti.c
> @@ -940,6 +940,7 @@ static int vti6_newlink(struct net *src_net, struct
> net_device *dev,  {
>  	struct net *net = dev_net(dev);
>  	struct ip6_tnl *nt;
> +	int ret;
> 
>  	nt = netdev_priv(dev);
>  	vti6_netlink_parms(data, &nt->parms);
> @@ -949,7 +950,11 @@ static int vti6_newlink(struct net *src_net, struct
> net_device *dev,
>  	if (vti6_locate(net, &nt->parms, 0))
>  		return -EEXIST;
> 
> -	return vti6_tnl_create2(dev);
> +	ret = vti6_tnl_create2(dev);
> +	if (ret)
> +		free_percpu(dev->tstats);
> +
> +	return ret;
>  }
> 
>  static void vti6_dellink(struct net_device *dev, struct list_head *head)
diff
> --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 99853c6..f45dc4a 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -1555,8 +1555,11 @@ static int ipip6_newlink(struct net *src_net,
struct
> net_device *dev,
>  		return -EEXIST;
> 
>  	err = ipip6_tunnel_create(dev);
> -	if (err < 0)
> +	if (err < 0) {
> +		dst_cache_destroy(&nt->dst_cache);
> +		free_percpu(dev->tstats);
>  		return err;
> +	}
> 
>  #ifdef CONFIG_IPV6_SIT_6RD
>  	if (ipip6_netlink_6rd_parms(data, &ip6rd))
> --
> 1.9.1

Actually I have another simpler solution to fix it. 
When newlink failed, its caller "rtnl_newlink" invokes the destructor if it
exists like the following:
	if (dev->reg_state == NETREG_UNINITIALIZED)
		if (dev->destructor)
			dev->destructor(dev);
		else
			free_netdev(dev);

There are two reasons I don't adopt this solution.
1. I don't know if it is against the original purpose of dev->destructor and
rtnl_newlink.
2. It breaks the design rule that "who malloc, who free".

But it is one more simple fix, then what's your opinion?

Best Regards
Feng

^ permalink raw reply

* Re: [PATCH v3 net] net: ipv6: regenerate host route if moved to gc list
From: Andrey Konovalov @ 2017-04-25 12:50 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, Dmitry Vyukov, mmanning, Martin KaFai Lau
In-Reply-To: <1493046549-17420-1-git-send-email-dsa@cumulusnetworks.com>

On Mon, Apr 24, 2017 at 5:09 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
> Taking down the loopback device wreaks havoc on IPv6 routing. By
> extension, taking down a VRF device wreaks havoc on its table.
>
> Dmitry and Andrey both reported heap out-of-bounds reports in the IPv6
> FIB code while running syzkaller fuzzer. The root cause is a dead dst
> that is on the garbage list gets reinserted into the IPv6 FIB. While on
> the gc (or perhaps when it gets added to the gc list) the dst->next is
> set to an IPv4 dst. A subsequent walk of the ipv6 tables causes the
> out-of-bounds access.
>
> Andrey's reproducer was the key to getting to the bottom of this.
>
> With IPv6, host routes for an address have the dst->dev set to the
> loopback device. When the 'lo' device is taken down, rt6_ifdown initiates
> a walk of the fib evicting routes with the 'lo' device which means all
> host routes are removed. That process moves the dst which is attached to
> an inet6_ifaddr to the gc list and marks it as dead.
>
> The recent change to keep global IPv6 addresses added a new function,
> fixup_permanent_addr, that is called on admin up. That function restarts
> dad for an inet6_ifaddr and when it completes the host route attached
> to it is inserted into the fib. Since the route was marked dead and
> moved to the gc list, re-inserting the route causes the reported
> out-of-bounds accesses. If the device with the address is taken down
> or the address is removed, the WARN_ON in fib6_del is triggered.
>
> All of those faults are fixed by regenerating the host route if the
> existing one has been moved to the gc list, something that can be
> determined by checking if the rt6i_ref counter is 0.

Hi David,

I've been running syzkaller with your patch and got another report
from ip6_pol_route.

It happened only once so far and I couldn't reproduce it.

==================================================================
BUG: KASAN: slab-out-of-bounds in find_rr_leaf net/ipv6/route.c:722
[inline] at addr ffff880033dddaa8
BUG: KASAN: slab-out-of-bounds in rt6_select net/ipv6/route.c:758
[inline] at addr ffff880033dddaa8
BUG: KASAN: slab-out-of-bounds in ip6_pol_route+0x1b1e/0x1ba0
net/ipv6/route.c:1091 at addr ffff880033dddaa8
Read of size 4 by task syz-executor7/9808
CPU: 2 PID: 9808 Comm: syz-executor7 Not tainted 4.11.0-rc8+ #268
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x192/0x22d lib/dump_stack.c:52
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:164
 print_address_description mm/kasan/report.c:202 [inline]
 kasan_report_error mm/kasan/report.c:291 [inline]
 kasan_report+0x252/0x510 mm/kasan/report.c:347
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:367
 find_rr_leaf net/ipv6/route.c:722 [inline]
 rt6_select net/ipv6/route.c:758 [inline]
 ip6_pol_route+0x1b1e/0x1ba0 net/ipv6/route.c:1091
 ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212
 fib6_rule_action+0x261/0x8a0 net/ipv6/fib6_rules.c:100
 fib_rules_lookup+0x34b/0xae0 net/core/fib_rules.c:265
 fib6_rule_lookup+0x175/0x360 net/ipv6/fib6_rules.c:44
 ip6_route_output_flags+0x260/0x2f0 net/ipv6/route.c:1240
 ip6_route_output include/net/ip6_route.h:79 [inline]
 ip6_dst_lookup_tail+0xe59/0x1640 net/ipv6/ip6_output.c:959
 ip6_dst_lookup_flow+0xb1/0x260 net/ipv6/ip6_output.c:1082
 ip6_sk_dst_lookup_flow+0x2c6/0x7f0 net/ipv6/ip6_output.c:1113
 udpv6_sendmsg+0x2350/0x3310 net/ipv6/udp.c:1219
 inet_sendmsg+0x164/0x490 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1a/0xa9
RIP: 0033:0x4458d9
RSP: 002b:00007ff3a5343b58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000708000 RCX: 00000000004458d9
RDX: 0000000000000fa8 RSI: 0000000020000000 RDI: 0000000000000005
RBP: 0000000000004300 R08: 0000000020006000 R09: 000000000000001c
R10: 0000000000040000 R11: 0000000000000282 R12: 00000000006e33c0
R13: 0000000000000005 R14: 0000000000000029 R15: 0000000000000023
Object at ffff880033ddd940, in cache ip_dst_cache size: 224
Allocated:
PID = 9717
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:616
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:555
 slab_post_alloc_hook mm/slab.h:456 [inline]
 slab_alloc_node mm/slub.c:2718 [inline]
 slab_alloc mm/slub.c:2726 [inline]
 kmem_cache_alloc+0xa8/0x160 mm/slub.c:2731
 dst_alloc+0x11b/0x1a0 net/core/dst.c:209
 rt_dst_alloc+0xf0/0x5d0 net/ipv4/route.c:1497
 __mkroute_output net/ipv4/route.c:2181 [inline]
 __ip_route_output_key_hash+0xdc4/0x2930 net/ipv4/route.c:2391
 __ip_route_output_key include/net/route.h:123 [inline]
 ip_route_output_flow+0x29/0xa0 net/ipv4/route.c:2477
 ip_route_output_key include/net/route.h:133 [inline]
 sctp_v4_get_dst+0x606/0x1420 net/sctp/protocol.c:458
 sctp_transport_route+0xa8/0x420 net/sctp/transport.c:287
 sctp_assoc_add_peer+0x5a5/0x1470 net/sctp/associola.c:656
 sctp_sendmsg+0x18a8/0x3b50 net/sctp/socket.c:1871
 inet_sendmsg+0x164/0x490 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1a/0xa9
Freed:
PID = 868
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:589
 slab_free_hook mm/slub.c:1357 [inline]
 slab_free_freelist_hook mm/slub.c:1379 [inline]
 slab_free mm/slub.c:2961 [inline]
 kmem_cache_free+0x72/0x190 mm/slub.c:2983
 dst_destroy+0x24c/0x390 net/core/dst.c:269
 dst_free include/net/dst.h:428 [inline]
 rt_fibinfo_free net/ipv4/fib_semantics.c:155 [inline]
 free_fib_info_rcu+0x852/0xa10 net/ipv4/fib_semantics.c:214
 __rcu_reclaim kernel/rcu/rcu.h:118 [inline]
 rcu_do_batch.isra.65+0x6de/0xbd0 kernel/rcu/tree.c:2879
 invoke_rcu_callbacks kernel/rcu/tree.c:3142 [inline]
 __rcu_process_callbacks kernel/rcu/tree.c:3109 [inline]
 rcu_process_callbacks+0x23f/0x810 kernel/rcu/tree.c:3126
 __do_softirq+0x253/0x78b kernel/softirq.c:284
Memory state around the buggy address:
 ffff880033ddd980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff880033ddda00: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc
>ffff880033ddda80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                  ^
 ffff880033dddb00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff880033dddb80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
==================================================================


>
> Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Reported-by: Andrey Konovalov <andreyknvl@google.com>
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> ---
> v3
> - removed 'if (prev)' and just call ip6_rt_put; added comment about spinlock
>
> v2
> - change ifp->rt under spinlock vs cmpxchg
> - add comment about rt6i_ref == 0
>
>  net/ipv6/addrconf.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 80ce478c4851..93f81d9cd85f 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -3271,14 +3271,25 @@ static void addrconf_gre_config(struct net_device *dev)
>  static int fixup_permanent_addr(struct inet6_dev *idev,
>                                 struct inet6_ifaddr *ifp)
>  {
> -       if (!ifp->rt) {
> -               struct rt6_info *rt;
> +       /* rt6i_ref == 0 means the host route was removed from the
> +        * FIB, for example, if 'lo' device is taken down. In that
> +        * case regenerate the host route.
> +        */
> +       if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
> +               struct rt6_info *rt, *prev;
>
>                 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
>                 if (unlikely(IS_ERR(rt)))
>                         return PTR_ERR(rt);
>
> +               prev = ifp->rt;
> +
> +               /* ifp->rt can be accessed outside of rtnl */
> +               spin_lock(&ifp->lock);
>                 ifp->rt = rt;
> +               spin_unlock(&ifp->lock);
> +
> +               ip6_rt_put(prev);
>         }
>
>         if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
> --
> 2.1.4
>

^ permalink raw reply

* Re: [PATCH net-next 0/2] flower: add MPLS matching support
From: Jamal Hadi Salim @ 2017-04-25 12:47 UTC (permalink / raw)
  To: Simon Horman, Jakub Kicinski
  Cc: David Miller, benjamin.lahaise, netdev, bcrl, Jiri Pirko
In-Reply-To: <20170425115459.GA23939@vergenet.net>

On 17-04-25 07:55 AM, Simon Horman wrote:
[..]
>
> I agree something should be done wrt BOS. If the LABEL and TC are to
> be left as-is then I think a similar treatment of BOS - that is masking it
> - makes sense.
>
> I also agree with statements made earlier in the thread that it is unlikely
> that the unused bits of these attributes will be used - as opposed to a
> bitmask of flag values which seems ripe for re-use for future flags.
>

For your use case, I think you are fine if you just do the mask in the
kernel. A mask  to a user value implies "I am ignoring the rest
of these bits - I dont  care if you set them "

> I would like to add to the discussion that I think in future it would
> be good to expand the features provided by this patch to support supplying
> a mask as part of the match - as flower supports for other fields such
> as IP addresses. But I think the current scheme of masking out invalid bits
> should also work in conjunction with user-supplied masks.
>

The challenge we have right now is "users do stoopid or malicious
things". So are you going to accept the wrong bitmap + mask?

cheers,
jamal

^ permalink raw reply

* [PATCH net] sfc: tx ring can only have 2048 entries for all EF10 NICs
From: Bert Kenward @ 2017-04-25 12:44 UTC (permalink / raw)
  To: Dave Miller; +Cc: netdev, linux-net-drivers, Patrick Talbert

Fixes: dd248f1bc65b ("sfc: Add PCI ID for Solarflare 8000 series 10/40G NIC")
Reported-by: Patrick Talbert <ptalbert@redhat.com>
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
---
 drivers/net/ethernet/sfc/efx.h         | 5 ++++-
 drivers/net/ethernet/sfc/workarounds.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index ee14662415c5..a0c52e328102 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
 #define EFX_RXQ_MIN_ENT		128U
 #define EFX_TXQ_MIN_ENT(efx)	(2 * efx_tx_max_skb_descs(efx))
 
-#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_35388(efx) ? \
+/* All EF10 architecture NICs steal one bit of the DMAQ size for various
+ * other purposes when counting TxQ entries, so we halve the queue size.
+ */
+#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_EF10(efx) ? \
 				 EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE)
 
 static inline bool efx_rss_enabled(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 103f827a1623..c67fa18b8121 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -16,6 +16,7 @@
  */
 
 #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
+#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 #define EFX_WORKAROUND_10G(efx) 1
 
 /* Bit-bashed I2C reads cause performance drop */
-- 
2.7.4

^ permalink raw reply related

* Re: [RFC 3/4] nfp: make use of extended ack message reporting
From: Jamal Hadi Salim @ 2017-04-25 12:42 UTC (permalink / raw)
  To: Jakub Kicinski, netdev
  Cc: davem, johannes, dsa, daniel, alexei.starovoitov, bblanco,
	john.fastabend, kubakici, oss-drivers
In-Reply-To: <20170425080644.122536-4-jakub.kicinski@netronome.com>

Good stuff. Question below:

On 17-04-25 04:06 AM, Jakub Kicinski wrote:
> Try to carry error messages to the user via the netlink extended

[..]
> +nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
> +		     struct netlink_ext_ack *extack)
>  {
>  	/* XDP-enabled tests */
>  	if (!dp->xdp_prog)
>  		return 0;
>  	if (dp->fl_bufsz > PAGE_SIZE) {
> -		nn_warn(nn, "MTU too large w/ XDP enabled\n");
> +		NL_SET_ERR_MSG(extack, "MTU too large w/ XDP enabled");

So are we going to standardize these strings?
i.e what if some user has written a bash script that depends on this
string and it gets changed later.

cheers,
jamal

^ permalink raw reply

* Re: net/ipv6: slab-out-of-bounds in ip6_tnl_xmit
From: Andrey Konovalov @ 2017-04-25 12:32 UTC (permalink / raw)
  To: Cong Wang
  Cc: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet,
	Dmitry Vyukov, Kostya Serebryany, syzkaller
In-Reply-To: <CAM_iQpWnPgn0+-y34nGTB0byonLWCdf3_VAKcH8tK5fdzF4CBg@mail.gmail.com>

On Tue, Apr 25, 2017 at 7:04 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Mon, Apr 24, 2017 at 9:47 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>>
>> We use ipv4 dst in ip6_tunnel and cast an IPv4 neigh key as an
>> IPv6 address...
>>
>>
>>                 neigh = dst_neigh_lookup(skb_dst(skb),
>>                                          &ipv6_hdr(skb)->daddr);
>>                 if (!neigh)
>>                         goto tx_err_link_failure;
>>
>>                 addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE
>>                 addr_type = ipv6_addr_type(addr6);
>>
>>                 if (addr_type == IPV6_ADDR_ANY)
>>                         addr6 = &ipv6_hdr(skb)->daddr;
>>
>>                 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
>>
>> Also the network header of the skb at this point should be still IPv4?
>
> Please try the attached patch.

I don't see these crashes with your patch.

Thanks!

Tested-by: Andrey Konovalov <andreyknvl@google.com>

>
> I am not sure how we could handle 4in6 case better than just relying on
> the config of ip6 tunnel.

^ permalink raw reply

* net: pcnet32: [BUG] IRQ disabled when resizing ring buffer
From: Corentin Labbe @ 2017-04-25 12:25 UTC (permalink / raw)
  To: pcnet32; +Cc: netdev, linux-kernel

Hello

When resizing ring buffer on qemu/pcnet32 with linux-next and CONFIG_DMA_API_DEBUG=y, I got the following trace:
[8862.793779] ------------[ cut here ]------------
[ 8862.793784] WARNING: CPU: 0 PID: 26592 at /linux-next/include/linux/dma-mapping.h:505 pcnet32_set_ringparam+0xd55/0xda0
[ 8862.793785] Modules linked in:
[ 8862.793788] CPU: 0 PID: 26592 Comm: ethtool Tainted: G        W       4.11.0-rc7-next-20170424+ #68
[ 8862.793789] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
[ 8862.793790] task: ffff880235390000 task.stack: ffff8801db300000
[ 8862.793792] RIP: 0010:pcnet32_set_ringparam+0xd55/0xda0
[ 8862.793794] RSP: 0018:ffff8801db303be0 EFLAGS: 00010046
[ 8862.793795] RAX: 0000000000000086 RBX: ffff880234d8d000 RCX: 0000000180800052
[ 8862.793797] RDX: 0000000000000004 RSI: ffffea00070b7700 RDI: ffff880236803a40
[ 8862.793798] RBP: ffff8801db303ca0 R08: 0000000000000001 R09: ffff880236a138a0
[ 8862.793799] R10: 00000000bb83e000 R11: ffff8800bb83e000 R12: 0000000000000004
[ 8862.793800] R13: 0000000000000010 R14: ffff8801db303cdc R15: ffffffff81807300
[ 8862.793803] FS:  00007f55edcc6700(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
[ 8862.793804] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 8862.793805] CR2: 00007f55ed4eebe0 CR3: 00000001db2e0000 CR4: 00000000000006f0
[ 8862.793808] Call Trace:
[ 8862.793811]  ? get_page_from_freelist+0x263/0x930
[ 8862.793814]  dev_ethtool+0xde3/0x20b0
[ 8862.793816]  ? __alloc_pages_nodemask+0xd9/0xd70
[ 8862.793818]  ? __rtnl_unlock+0x25/0x60
[ 8862.793821]  ? netdev_run_todo+0x5c/0x320
[ 8862.793823]  ? __might_sleep+0x50/0x90
[ 8862.793824]  ? _cond_resched+0x14/0x30
[ 8862.793826]  dev_ioctl+0x18c/0x620
[ 8862.793828]  ? _raw_spin_unlock+0x9/0x20
[ 8862.793830]  sock_do_ioctl+0x4f/0x60
[ 8862.793832]  sock_ioctl+0x247/0x320
[ 8862.793834]  do_vfs_ioctl+0xaa/0x720
[ 8862.793836]  ? __do_page_fault+0x1d1/0x410
[ 8862.793838]  SyS_ioctl+0x47/0x80
[ 8862.793840]  entry_SYSCALL_64_fastpath+0x1a/0xa5
[ 8862.793842] RIP: 0033:0x7f55ed4eebe7
[ 8862.793843] RSP: 002b:00007ffd0b76bef8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 8862.793845] RAX: ffffffffffffffda RBX: 00007ffd0b76c050 RCX: 00007f55ed4eebe7
[ 8862.793846] RDX: 00007ffd0b76c060 RSI: 0000000000008946 RDI: 0000000000000003
[ 8862.793847] RBP: 00007ffd0b76bf20 R08: 000000000000000a R09: 0000000000000000
[ 8862.793848] R10: 000000000000053e R11: 0000000000000246 R12: 00007ffd0b76c060
[ 8862.793848] R13: 00007ffd0b76c1d0 R14: 0000000000420e6f R15: 0000000000000002
[ 8862.793850] Code: 25 19 e1 6f 00 4d 85 e4 0f 85 34 fd ff ff eb 95 48 c7 c2 20 ea 8a 81 48 c7 c6 32 a6 9c 81 48 89 df e8 80 a7 18 00 e9 33 ff ff ff <0f> ff e9 a2 f7 ff ff 0f ff 66 90 e9    2d f6 ff ff 0f ff 66 0f 1f
[ 8862.793881] ---[ end trace ecb86a2cf3a149f7 ]---

Regards
Corentin Labbe

^ permalink raw reply

* [PATCH 3/3] net: can: usb: gs_usb: Fix buffer on stack
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev
  Cc: davem, linux-can, kernel, Maksim Salau, linux-stable # = v4 . 8,
	Marc Kleine-Budde
In-Reply-To: <20170425121645.9946-1-mkl@pengutronix.de>

From: Maksim Salau <maksim.salau@gmail.com>

Allocate buffers on HEAP instead of STACK for local structures
that are to be sent using usb_control_msg().

Signed-off-by: Maksim Salau <maksim.salau@gmail.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v4.8
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 300349fe8dc0..eecee7f8dfb7 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = {
 static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 {
 	struct gs_can *dev = netdev_priv(netdev);
-	struct gs_identify_mode imode;
+	struct gs_identify_mode *imode;
 	int rc;
 
+	imode = kmalloc(sizeof(*imode), GFP_KERNEL);
+
+	if (!imode)
+		return -ENOMEM;
+
 	if (do_identify)
-		imode.mode = GS_CAN_IDENTIFY_ON;
+		imode->mode = GS_CAN_IDENTIFY_ON;
 	else
-		imode.mode = GS_CAN_IDENTIFY_OFF;
+		imode->mode = GS_CAN_IDENTIFY_OFF;
 
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 			     USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
-			     &imode,
-			     sizeof(imode),
+			     imode,
+			     sizeof(*imode),
 			     100);
 
+	kfree(imode);
+
 	return (rc > 0) ? 0 : rc;
 }
 
-- 
2.11.0

^ permalink raw reply related

* pull-request: can 2017-04-25
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-can, kernel

Hello David,

this is a pull request of three patches for net/master.

There are two patches by Stephane Grosjean for that add a new variant to the
PCAN-Chip USB driver. The other patch is by Maksim Salau, which swtiches the
memory for USB transfers from heap to stack.

regrds,
Marc

---

The following changes since commit 38a98bceaf5f786b931d16826fbb46e73280849b:

  Merge branch 'dsa-b53-58xx-fixes' (2017-04-24 18:29:11 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git tags/linux-can-fixes-for-4.11-20170425

for you to fetch changes up to b05c73bd1e3ec60357580eb042ee932a5ed754d5:

  net: can: usb: gs_usb: Fix buffer on stack (2017-04-25 14:08:35 +0200)

----------------------------------------------------------------
linux-can-fixes-for-4.11-20170425

----------------------------------------------------------------
Maksim Salau (1):
      net: can: usb: gs_usb: Fix buffer on stack

Stephane Grosjean (2):
      can: usb: Add support of PCAN-Chip USB stamp module
      can: usb: Kconfig: Add PCAN-USB X6 device in help text

 drivers/net/can/usb/Kconfig                  |  2 +
 drivers/net/can/usb/gs_usb.c                 | 17 +++++--
 drivers/net/can/usb/peak_usb/pcan_usb_core.c |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.h |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 72 ++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 5 deletions(-)

^ permalink raw reply

* [PATCH 2/3] can: usb: Kconfig: Add PCAN-USB X6 device in help text
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-can, kernel, Stephane Grosjean, Marc Kleine-Budde
In-Reply-To: <20170425121645.9946-1-mkl@pengutronix.de>

From: Stephane Grosjean <s.grosjean@peak-system.com>

This patch adds a text line in the help section of the CAN_PEAK_USB
config item describing the support of the PCAN-USB X6 adapter, which is
already included in the Kernel since 4.9.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 3f8adc366af4..5f9e0e6301d0 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -73,6 +73,7 @@ config CAN_PEAK_USB
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
 	  PCAN-Chip USB        CAN-FD to USB stamp module
+	  PCAN-USB X6          6 CAN-FD channels USB adapter
 
 	  (see also http://www.peak-system.com).
 
-- 
2.11.0


^ permalink raw reply related

* [PATCH 1/3] can: usb: Add support of PCAN-Chip USB stamp module
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-can, kernel, Stephane Grosjean, Marc Kleine-Budde
In-Reply-To: <20170425121645.9946-1-mkl@pengutronix.de>

From: Stephane Grosjean <s.grosjean@peak-system.com>

This patch adds the support of the PCAN-Chip USB, a stamp module for
customer hardware designs, which communicates via USB 2.0 with the
hardware. The integrated CAN controller supports the protocols CAN 2.0 A/B
as well as CAN FD. The physical CAN connection is determined by external
wiring. The Stamp module with its single-sided mounting and plated
half-holes is suitable for automatic assembly.

Note that the chip is equipped with the same logic than the PCAN-USB FD.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig                  |  1 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.c |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.h |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 72 ++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 8483a40e7e9e..3f8adc366af4 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -72,6 +72,7 @@ config CAN_PEAK_USB
 	  PCAN-USB Pro         dual CAN 2.0b channels USB adapter
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
+	  PCAN-Chip USB        CAN-FD to USB stamp module
 
 	  (see also http://www.peak-system.com).
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 0b0302af3bd2..57913dbbae0a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = {
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)},
+	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)},
 	{} /* Terminating entry */
 };
@@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = {
 	&pcan_usb_pro,
 	&pcan_usb_fd,
 	&pcan_usb_pro_fd,
+	&pcan_usb_chip,
 	&pcan_usb_x6,
 };
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 3cbfb069893d..c01316cac354 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -27,6 +27,7 @@
 #define PCAN_USBPRO_PRODUCT_ID		0x000d
 #define PCAN_USBPROFD_PRODUCT_ID	0x0011
 #define PCAN_USBFD_PRODUCT_ID		0x0012
+#define PCAN_USBCHIP_PRODUCT_ID		0x0013
 #define PCAN_USBX6_PRODUCT_ID		0x0014
 
 #define PCAN_USB_DRIVER_NAME		"peak_usb"
@@ -90,6 +91,7 @@ struct peak_usb_adapter {
 extern const struct peak_usb_adapter pcan_usb;
 extern const struct peak_usb_adapter pcan_usb_pro;
 extern const struct peak_usb_adapter pcan_usb_fd;
+extern const struct peak_usb_adapter pcan_usb_chip;
 extern const struct peak_usb_adapter pcan_usb_pro_fd;
 extern const struct peak_usb_adapter pcan_usb_x6;
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 304732550f0a..528d3bb4917f 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = {
 	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
 };
 
+/* describes the PCAN-CHIP USB */
+static const struct can_bittiming_const pcan_usb_chip_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TSLOW_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TSLOW_BRP_BITS),
+	.brp_inc = 1,
+};
+
+static const struct can_bittiming_const pcan_usb_chip_data_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TFAST_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TFAST_BRP_BITS),
+	.brp_inc = 1,
+};
+
+const struct peak_usb_adapter pcan_usb_chip = {
+	.name = "PCAN-Chip USB",
+	.device_id = PCAN_USBCHIP_PRODUCT_ID,
+	.ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
+	.ctrlmode_supported = CAN_CTRLMODE_FD |
+		CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY,
+	.clock = {
+		.freq = PCAN_UFD_CRYSTAL_HZ,
+	},
+	.bittiming_const = &pcan_usb_chip_const,
+	.data_bittiming_const = &pcan_usb_chip_data_const,
+
+	/* size of device private data */
+	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
+
+	/* timestamps usage */
+	.ts_used_bits = 32,
+	.ts_period = 1000000, /* calibration period in ts. */
+	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
+	.us_per_ts_shift = 0,
+
+	/* give here messages in/out endpoints */
+	.ep_msg_in = PCAN_USBPRO_EP_MSGIN,
+	.ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0},
+
+	/* size of rx/tx usb buffers */
+	.rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE,
+	.tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE,
+
+	/* device callbacks */
+	.intf_probe = pcan_usb_pro_probe,	/* same as PCAN-USB Pro */
+	.dev_init = pcan_usb_fd_init,
+
+	.dev_exit = pcan_usb_fd_exit,
+	.dev_free = pcan_usb_fd_free,
+	.dev_set_bus = pcan_usb_fd_set_bus,
+	.dev_set_bittiming = pcan_usb_fd_set_bittiming_slow,
+	.dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast,
+	.dev_decode_buf = pcan_usb_fd_decode_buf,
+	.dev_start = pcan_usb_fd_start,
+	.dev_stop = pcan_usb_fd_stop,
+	.dev_restart_async = pcan_usb_fd_restart_async,
+	.dev_encode_msg = pcan_usb_fd_encode_msg,
+
+	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
+};
+
 /* describes the PCAN-USB Pro FD adapter */
 static const struct can_bittiming_const pcan_usb_pro_fd_const = {
 	.name = "pcan_usb_pro_fd",
-- 
2.11.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox