Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next 02/10] udp: add gso
From: Willem de Bruijn @ 2018-04-26 17:48 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Netdev, David Miller, Dimitris Michailidis, Willem de Bruijn
In-Reply-To: <CAKgT0UfP7ztrtV7smQviAXZyaiPAwCSARuKnbKnc3SP_R1ogsQ@mail.gmail.com>

Sent a v2 with all but the below suggestion incorporated.

>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ff49e352deea..c647cfe114e0 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4940,6 +4940,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
>>                 thlen = tcp_hdrlen(skb);
>>         } else if (unlikely(skb_is_gso_sctp(skb))) {
>>                 thlen = sizeof(struct sctphdr);
>> +       } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
>> +               thlen = sizeof(struct udphdr);
>>         }
>>         /* UFO sets gso_size to the size of the fragmentation
>>          * payload, i.e. the size of the L4 (UDP) header is already
>
> It might make more sense to look at converting this over to a switch
> statement based off of shinfo(skb)->gso_type & GSO_TRANSPORT_MASK,
> where the transport mask consists of the 4 bits that are supported.

I decided to skip this. The types SKB_GSO_{TCPV4, TCPV6, SCTP, UDP_L4}
are far apart in the enum namespace and the tests have to use & instead
of direct comparison. I did not see an obvious way to have the compiler
convert this into a jump table.

Doing so is also a bit out of scope of the feature, so even if feasible
without too much gymnastics I suggest doing so in a separate patch.

^ permalink raw reply

* Re: [PATCH net-next 02/10] udp: add gso
From: Willem de Bruijn @ 2018-04-26 17:49 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Netdev, David Miller, Dimitris Michailidis, Willem de Bruijn
In-Reply-To: <CAKgT0UccfvfwLemZ5XcWoZPupQi6U2rfjbGjfjFfvJptMcNYeQ@mail.gmail.com>

>>> That way for things like GSO_PARTIAL we can update after segmentation
>>> since there are only going to be 2 segments most likely instead of
>>> multiple MSS sized segments.
>>
>> I don't quite follow. Which two segments?
>
> When we do GSO partial we end up with 2 segments. One really big one
> that is a multiple of MSS and the remainder assuming the frame is odd
> sized. The idea is we can just replicate all of the headers from the
> outer IP header to the inner transport header in hardware so we do all
> the updates based on that assumption and then we do the standard
> segmentation update on the tail skb.

Thanks for the explanation. That is a very cool feature. I clearly hadn't
read the GSO_PARTIAL code closely enough yet.

^ permalink raw reply

* [PATCH net-next 1/1] inet_diag: fetch cong algo info when socket is destroyed
From: Jamal Hadi Salim @ 2018-04-26 17:58 UTC (permalink / raw)
  To: davem
  Cc: kraig, netdev, eric.dumazet, kernel, Jamal Hadi Salim,
	Jamal Hadi Salim

From: Jamal Hadi Salim <hadi@mojatatu.com>

When a user dumps an existing established tcp socket state
via inet diag, it is possible to retrieve the congestion control
details.
When an the sock is destroyed, the generated event has all the
details available in the dump sans congestion control info.
This patch fixes it.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 net/core/sock_diag.c |  3 +++
 net/ipv4/inet_diag.c | 48 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index c37b5be7c5e4..0bf64dd70aee 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -7,6 +7,7 @@
 #include <net/net_namespace.h>
 #include <linux/module.h>
 #include <net/sock.h>
+#include <net/tcp.h>
 #include <linux/kernel.h>
 #include <linux/tcp.h>
 #include <linux/workqueue.h>
@@ -112,6 +113,8 @@ static size_t sock_diag_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
 	       + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+	       + nla_total_size(TCP_CA_NAME_MAX) /* INET_DIAG_CONG */
+	       + nla_total_size(sizeof(union tcp_cc_info))
 	       + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
 }
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4e5bc4b2f14e..9722f31cc9c5 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -159,6 +159,35 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
 
+static int inet_csk_cong_fill(struct sock *sk, struct sk_buff *skb, int ext)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_congestion_ops *ca_ops;
+	union tcp_cc_info info;
+	int attr, err = 0;
+	size_t sz = 0;
+
+	rcu_read_lock();
+	ca_ops = READ_ONCE(icsk->icsk_ca_ops);
+	if (ca_ops) {
+		if (ca_ops->get_info)
+			sz = ca_ops->get_info(sk, ext, &attr, &info);
+		if (ext & (1 << (INET_DIAG_CONG - 1))) {
+			err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
+			if (err < 0) {
+				rcu_read_unlock();
+				return err;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	if (sz)
+		err = nla_put(skb, attr, sz, &info);
+
+	return err;
+}
+
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 		      struct user_namespace *user_ns,
@@ -274,16 +303,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			goto errout;
 
 	if (sk->sk_state < TCP_TIME_WAIT) {
-		union tcp_cc_info info;
-		size_t sz = 0;
-		int attr;
-
-		rcu_read_lock();
-		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
-		if (ca_ops && ca_ops->get_info)
-			sz = ca_ops->get_info(sk, ext, &attr, &info);
-		rcu_read_unlock();
-		if (sz && nla_put(skb, attr, sz, &info) < 0)
+		if (inet_csk_cong_fill(sk, skb, ext))
 			goto errout;
 	}
 
@@ -1215,6 +1235,14 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 	if (attr)
 		info = nla_data(attr);
 
+#define EXT_MASK (1 << (INET_DIAG_VEGASINFO - 1) | 1 << (INET_DIAG_CONG - 1))
+	err = inet_csk_cong_fill(sk, skb, EXT_MASK);
+	if (err) {
+		inet_diag_unlock_handler(handler);
+		nlmsg_cancel(skb, nlh);
+		return err;
+	}
+
 	handler->idiag_get_info(sk, r, info);
 	inet_diag_unlock_handler(handler);
 
-- 
2.11.0

^ permalink raw reply related

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 18:49 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
	eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
	Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
	David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804261202350.24656@file01.intranet.prod.int.rdu2.redhat.com>

On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > IIUC debug kernels mainly exist so people who experience e.g. memory
> > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > will *already* catch a failure early. Nothing special needs to be done.
> 
> The patch helps people debug such memory coprruptions (such as using DMA 
> API on the result of kvmalloc).

That's my point.  I don't think your patch helps debug any memory
corruptions.  With CONFIG_DEBUG_SG using DMA API already causes a
BUG_ON, that's before any memory can get corrupted.

-- 
MST

^ permalink raw reply

* [PULL] virtio: fixups
From: Michael S. Tsirkin @ 2018-04-26 18:50 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: kvm, mst, netdev, linux-kernel, stable, virtualization

The following changes since commit 6d08b06e67cd117f6992c46611dfb4ce267cd71e:

  Linux 4.17-rc2 (2018-04-22 19:20:09 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to 5c60300d68da32ca77f7f978039dc72bfc78b06b:

  virtio_console: reset on out of memory (2018-04-25 20:41:29 +0300)

----------------------------------------------------------------
virtio: fixups

Latest header update will break QEMU (if it's rebuilt with the new
header) - and it seems that the code there is so fragile that any change
in this header will break it.  Add a better interface so users do not
need to change their code every time that header changes.

Fix virtio console for spec compliance.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

----------------------------------------------------------------
Michael S. Tsirkin (7):
      virtio_balloon: add array of stat names
      virtio_console: don't tie bufs to a vq
      virtio: add ability to iterate over vqs
      virtio_console: free buffers after reset
      virtio_console: drop custom control queue cleanup
      virtio_console: move removal code
      virtio_console: reset on out of memory

 drivers/char/virtio_console.c       | 157 ++++++++++++++++--------------------
 include/linux/virtio.h              |   3 +
 include/uapi/linux/virtio_balloon.h |  15 ++++
 3 files changed, 89 insertions(+), 86 deletions(-)

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 18:54 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: eric.dumazet, netdev, Randy Dunlap, linux-kernel, Matthew Wilcox,
	Michal Hocko, James Bottomley, linux-mm, dm-devel,
	Vlastimil Babka, David Rientjes, Andrew Morton, virtualization,
	David Miller, edumazet
In-Reply-To: <20180426214011-mutt-send-email-mst@kernel.org>



On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:

> On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > > IIUC debug kernels mainly exist so people who experience e.g. memory
> > > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > > will *already* catch a failure early. Nothing special needs to be done.
> > 
> > The patch helps people debug such memory coprruptions (such as using DMA 
> > API on the result of kvmalloc).
> 
> That's my point.  I don't think your patch helps debug any memory
> corruptions.  With CONFIG_DEBUG_SG using DMA API already causes a
> BUG_ON, that's before any memory can get corrupted.

The patch turns a hard-to-reproduce bug into an easy-to-reproduce bug. 

Obviously we don't want this in production kernels, but in the debug 
kernels it should be done.

Mikulas

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 18:58 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
	eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
	Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
	David Miller, Vlastimil Babka
In-Reply-To: <20180426184845-mutt-send-email-mst@kernel.org>



On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:

> How do you make sure QA tests a specific corner case? Add it to
> the test plan :)

BTW. how many "lines of code" of corporate bureaucracy would that take? :-)

> I don't speak for Red Hat, etc.
> 
> -- 
> MST

Mikulas

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: John Stoffel @ 2018-04-26 18:58 UTC (permalink / raw)
  To: James Bottomley
  Cc: Mikulas Patocka, Michal, eric.dumazet, mst, netdev, jasowang,
	Randy Dunlap, linux-kernel, Matthew Wilcox, Hocko, linux-mm,
	dm-devel, Vlastimil Babka, Andrew, David Rientjes, Morton,
	virtualization, David Miller, edumazet
In-Reply-To: <1524697697.4100.23.camel@HansenPartnership.com>

>>>>> "James" == James Bottomley <James.Bottomley@HansenPartnership.com> writes:

James> On Wed, 2018-04-25 at 19:00 -0400, Mikulas Patocka wrote:
>> 
>> On Wed, 25 Apr 2018, James Bottomley wrote:
>> 
>> > > > Do we really need the new config option?  This could just be
>> > > > manually  tunable via fault injection IIUC.
>> > > 
>> > > We do, because we want to enable it in RHEL and Fedora debugging
>> > > kernels, so that it will be tested by the users.
>> > > 
>> > > The users won't use some extra magic kernel options or debugfs
>> files.
>> > 
>> > If it can be enabled via a tunable, then the distro can turn it on
>> > without the user having to do anything.  If you want to present the
>> > user with a different boot option, you can (just have the tunable
>> set
>> > on the command line), but being tunable driven means that you don't
>> > have to choose that option, you could automatically enable it under
>> a
>> > range of circumstances.  I think most sane distributions would want
>> > that flexibility.
>> > 
>> > Kconfig proliferation, conversely, is a bit of a nightmare from
>> both
>> > the user and the tester's point of view, so we're trying to avoid
>> it
>> > unless absolutely necessary.
>> > 
>> > James
>> 
>> BTW. even developers who compile their own kernel should have this
>> enabled by a CONFIG option - because if the developer sees the option
>> when browsing through menuconfig, he may enable it. If he doesn't see
>> the option, he won't even know that such an option exists.

James> I may be an atypical developer but I'd rather have a root canal
James> than browse through menuconfig options.  The way to get people
James> to learn about new debugging options is to blog about it (or
James> write an lwn.net article) which google will find the next time
James> I ask it how I debug XXX.  Google (probably as a service to
James> humanity) rarely turns up Kconfig options in response to a
James> query.

I agree with James here.  Looking at the SLAB vs SLUB Kconfig entries
tells me *nothing* about why I should pick one or the other, as an
example.

John

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 19:05 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
	eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
	Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
	David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804261454380.23716@file01.intranet.prod.int.rdu2.redhat.com>

On Thu, Apr 26, 2018 at 02:58:08PM -0400, Mikulas Patocka wrote:
> 
> 
> On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> 
> > How do you make sure QA tests a specific corner case? Add it to
> > the test plan :)
> 
> BTW. how many "lines of code" of corporate bureaucracy would that take? :-)

It's pretty easy at least here at Red Hat.

> > I don't speak for Red Hat, etc.
> > 
> > -- 
> > MST
> 
> Mikulas

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 19:14 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
	eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
	Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
	David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804261451120.23716@file01.intranet.prod.int.rdu2.redhat.com>

On Thu, Apr 26, 2018 at 02:54:26PM -0400, Mikulas Patocka wrote:
> 
> 
> On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> 
> > On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > > > IIUC debug kernels mainly exist so people who experience e.g. memory
> > > > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > > > will *already* catch a failure early. Nothing special needs to be done.
> > > 
> > > The patch helps people debug such memory coprruptions (such as using DMA 
> > > API on the result of kvmalloc).
> > 
> > That's my point.  I don't think your patch helps debug any memory
> > corruptions.  With CONFIG_DEBUG_SG using DMA API already causes a
> > BUG_ON, that's before any memory can get corrupted.
> 
> The patch turns a hard-to-reproduce bug into an easy-to-reproduce bug. 

It's still not a memory corruption. It's a BUG_ON the source of which -
should it trigger - can be typically found using grep.

> Obviously we don't want this in production kernels, but in the debug 
> kernels it should be done.
> 
> Mikulas

I'm not so sure. debug kernels should make debugging easier,
definitely.

Unfortunately they are already slower so some races don't trigger.

If they also start crashing more because we are injecting
memory allocation errors, people are even less likely to
be able to use them.

Just add a comment near the BUG_ON within DMA API telling people how
they can inject this error some more if the bug does not
reproduce, and leave it at that.

-- 
MST

^ permalink raw reply

* Re: [PATCH net-next v2] Add Common Applications Kept Enhanced (cake) qdisc
From: kbuild test robot @ 2018-04-26 19:16 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: kbuild-all, netdev, cake, Toke Høiland-Jørgensen,
	Dave Taht
In-Reply-To: <20180424114407.5939-1-toke@toke.dk>

[-- Attachment #1: Type: text/plain, Size: 3367 bytes --]

Hi Toke,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Toke-H-iland-J-rgensen/Add-Common-Applications-Kept-Enhanced-cake-qdisc/20180426-064653
config: parisc-allmodconfig (attached as .config)
compiler: hppa-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=parisc 

All warnings (new ones prefixed by >>):


vim +2589 net//sched/sch_cake.c

  2525	
  2526	static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  2527	{
  2528		struct cake_sched_data *q = qdisc_priv(sch);
  2529		struct tc_cake_xstats *st;
  2530		size_t size = (sizeof(*st) +
  2531			       sizeof(struct tc_cake_tin_stats) * q->tin_cnt);
  2532		int i;
  2533	
  2534		st = cake_zalloc(size);
  2535	
  2536		if (!st)
  2537			return -1;
  2538	
  2539		st->version = 0x102; /* old userspace code discards versions > 0xFF */
  2540		st->tin_stats_size = sizeof(struct tc_cake_tin_stats);
  2541		st->tin_cnt = q->tin_cnt;
  2542	
  2543		st->avg_trnoff = (q->avg_trnoff + 0x8000) >> 16;
  2544		st->max_netlen = q->max_netlen;
  2545		st->max_adjlen = q->max_adjlen;
  2546		st->min_netlen = q->min_netlen;
  2547		st->min_adjlen = q->min_adjlen;
  2548	
  2549		for (i = 0; i < q->tin_cnt; i++) {
  2550			struct cake_tin_data *b = &q->tins[q->tin_order[i]];
  2551			struct tc_cake_tin_stats *tstat = &st->tin_stats[i];
  2552	
  2553			tstat->threshold_rate = b->tin_rate_bps;
  2554			tstat->target_us      = cobalt_time_to_us(b->cparams.target);
  2555			tstat->interval_us    = cobalt_time_to_us(b->cparams.interval);
  2556	
  2557			/* TODO FIXME: add missing aspects of these composite stats */
  2558			tstat->sent.packets       = b->packets;
  2559			tstat->sent.bytes	  = b->bytes;
  2560			tstat->dropped.packets    = b->tin_dropped;
  2561			tstat->ecn_marked.packets = b->tin_ecn_mark;
  2562			tstat->backlog.bytes      = b->tin_backlog;
  2563			tstat->ack_drops.packets  = b->ack_drops;
  2564	
  2565			tstat->peak_delay_us = cobalt_time_to_us(b->peak_delay);
  2566			tstat->avge_delay_us = cobalt_time_to_us(b->avge_delay);
  2567			tstat->base_delay_us = cobalt_time_to_us(b->base_delay);
  2568	
  2569			tstat->way_indirect_hits = b->way_hits;
  2570			tstat->way_misses	 = b->way_misses;
  2571			tstat->way_collisions    = b->way_collisions;
  2572	
  2573			tstat->sparse_flows      = b->sparse_flow_count +
  2574						   b->decaying_flow_count;
  2575			tstat->bulk_flows	 = b->bulk_flow_count;
  2576			tstat->unresponse_flows  = b->unresponsive_flow_count;
  2577			tstat->spare		 = 0;
  2578			tstat->max_skblen	 = b->max_skblen;
  2579	
  2580			tstat->flow_quantum	 = b->flow_quantum;
  2581		}
  2582		st->capacity_estimate = q->avg_peak_bandwidth;
  2583		st->memory_limit      = q->buffer_limit;
  2584		st->memory_used       = q->buffer_max_used;
  2585	
  2586		i = gnet_stats_copy_app(d, st, size);
  2587		cake_free(st);
  2588		return i;
> 2589	}
  2590	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 52468 bytes --]

^ permalink raw reply

* Re: [PATCH net-next v2 00/11] udp gso
From: David Miller @ 2018-04-26 19:23 UTC (permalink / raw)
  To: willemdebruijn.kernel; +Cc: netdev, alexander.duyck, willemb
In-Reply-To: <20180426174225.246388-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Thu, 26 Apr 2018 13:42:14 -0400

> Segmentation offload reduces cycles/byte for large packets by
> amortizing the cost of protocol stack traversal.
> 
> This patchset implements GSO for UDP. A process can concatenate and
> submit multiple datagrams to the same destination in one send call
> by setting socket option SOL_UDP/UDP_SEGMENT with the segment size,
> or passing an analogous cmsg at send time.

Looks great.

Build testing revealed that with ipv6=m we have to export
__udp_gso_segment (patch #2) and udp_cmsg_send (patch #6).

I added the exports while applying this series.

Nice work, thanks Willem!

^ permalink raw reply

* Re: [PATCH stable v4.4+] r8152: add Linksys USB3GIGV1 id
From: Grant Grundler @ 2018-04-26 19:34 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: Grant Grundler, Oliver Neukum, David S. Miller, linux-usb, netdev,
	LKML
In-Reply-To: <CAJKOXPcD2O8KhyrNj58fMRishdscVf9VoybWfMjezuZdETuibQ@mail.gmail.com>

On Thu, Apr 26, 2018 at 12:56 AM, Krzysztof Kozlowski <krzk@kernel.org> wrote:
> On Thu, Apr 26, 2018 at 2:40 AM, Grant Grundler <grundler@chromium.org> wrote:
>> On Wed, Apr 25, 2018 at 2:54 AM, Krzysztof Kozlowski <krzk@kernel.org>
>> wrote:
>>>
>>> commit 90841047a01b452cc8c3f9b990698b264143334a upstream
>>>
>>> This linksys dongle by default comes up in cdc_ether mode.
>>> This patch allows r8152 to claim the device:
>>>    Bus 002 Device 002: ID 13b1:0041 Linksys
>>>
>>> Signed-off-by: Grant Grundler <grundler@chromium.org>
>>> Reviewed-by: Douglas Anderson <dianders@chromium.org>
>>> Signed-off-by: David S. Miller <davem@davemloft.net>
>>> [krzk: Rebase on v4.4]'
>>
>>
>> thanks krzk!
>>
>> FTR, to support RTL8153B (HW ID 0x6010), the follow patch series to bring
>> r8152 v1.09.9 driver from 4.14 kernel.org to 3 (of 5) older Chrome OS
>> kernels:
>>
>> 3.14:
>> https://chromium-review.googlesource.com/q/topic:%22update_r8152-3.14%22+(status:open%20OR%20status:merged)
>> 3.18:
>> https://chromium-review.googlesource.com/q/topic:%2522update-r8152-3.18%2522+(status:open+OR+status:merged)
>> 4.4:
>> https://chromium-review.googlesource.com/q/topic:%2522update_r8152-4.4%2522+(status:open+OR+status:merged)
>>
>> caveat: These series are not suitable directly for kernel.org submission
>> (extraneous stuff in the commit messages, order is different). Using the
>> original SHA1 (in each commit message), this can all be fixed up by
>> hand/simple scripts.
>
> Hi Grant,
>
> These are regular feature/patch backports so they do not fit into
> stable process. Only new quirks and IDs are accepted for stable.

Hi Krzysztof!
Sorry, I wasn't advocating for -stable inclusion. I shared in case
someone has unusually high USB ethernet requirements similar to Chrome
OS test lab which nearly all dongles I've tested can't provide.

Chrome OS test lab needs a USB ethernet dongle that reliably
negotiates a link (e.g. 10k iterations in a row). RTL8153 in general
are good (> 99.99% gets GigE link speed) but RTL8153B is the first
dongle that meets Chrome OS test lab requirements. The patch series
above is required to support RTL8153B.

cheers,
grant

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 19:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: eric.dumazet, netdev, Randy Dunlap, linux-kernel, Matthew Wilcox,
	Michal Hocko, James Bottomley, linux-mm, dm-devel,
	Vlastimil Babka, David Rientjes, Andrew Morton, virtualization,
	David Miller, edumazet
In-Reply-To: <20180426220523-mutt-send-email-mst@kernel.org>



On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:

> On Thu, Apr 26, 2018 at 02:54:26PM -0400, Mikulas Patocka wrote:
> > 
> > 
> > On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> > 
> > > On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > > > > IIUC debug kernels mainly exist so people who experience e.g. memory
> > > > > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > > > > will *already* catch a failure early. Nothing special needs to be done.
> > > > 
> > > > The patch helps people debug such memory coprruptions (such as using DMA 
> > > > API on the result of kvmalloc).
> > > 
> > > That's my point.  I don't think your patch helps debug any memory
> > > corruptions.  With CONFIG_DEBUG_SG using DMA API already causes a
> > > BUG_ON, that's before any memory can get corrupted.
> > 
> > The patch turns a hard-to-reproduce bug into an easy-to-reproduce bug. 
> 
> It's still not a memory corruption. It's a BUG_ON the source of which -
> should it trigger - can be typically found using grep.
> 
> > Obviously we don't want this in production kernels, but in the debug 
> > kernels it should be done.
> > 
> > Mikulas
> 
> I'm not so sure. debug kernels should make debugging easier,
> definitely.
> 
> Unfortunately they are already slower so some races don't trigger.
> 
> If they also start crashing more because we are injecting
> memory allocation errors, people are even less likely to
> be able to use them.

I've actually already pushed this patch to RHEL-7 (just before 7.5 was 
released) and it found out some powerpc issues. See the commit 
ea376cc55bc3 in the RHEL-7 git. It was reverted just before RHEL-7.5 was 
released with the intention that it will be reinstated just after RHEL-7.5 
release, so that these issues could be found and eliminated in the 
7.5->7.6 development cycle. Jeff Moyer asked me to put it upstream because 
they want to follow upstream and they don't like RHEL-specific patches. 
There's clear incentive to put this patch to RHEL-7, that's why I'm 
posting it here.

> Just add a comment near the BUG_ON within DMA API telling people how
> they can inject this error some more if the bug does not
> reproduce, and leave it at that.

But the problem is that the powerpc bug only triggers with this patch. It 
doesn't trigger without it. So, we have a potential random-crashing bug in 
the codebase (and perhaps more others) and we want to eliminate them - 
that's why we need the patch.

People on this list argue "this should be a kernel parameter". But the 
testers won't enable the kernel parameter, the crashes won't happen 
without the kernel parameter and the bugs will stay unreported and 
uncorrected. That's why it needs to be the default.

Mikulas

^ permalink raw reply

* [PATCH] DT: net: can: rcar_canfd: document R8A77970 bindings
From: Sergei Shtylyov @ 2018-04-26 19:41 UTC (permalink / raw)
  To: Marc Kleine-Budde, Rob Herring, linux-can, netdev, devicetree
  Cc: Wolfgang Grandegger, Mark Rutland, linux-renesas-soc

Document the R-Car V3M (R8A77970) SoC support in the R-Car CAN-FD bindings.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
The patch is against the 'linux-can-next.git' repo but I wouldn't object if
it's merged to the 'linux-can.git' repo instead. :-)

 Documentation/devicetree/bindings/net/can/rcar_canfd.txt |    1 +
 1 file changed, 1 insertion(+)

Index: linux-can-next/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
===================================================================
--- linux-can-next.orig/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ linux-can-next/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -6,6 +6,7 @@ Required properties:
   - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
   - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
   - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3) compatible controller.
+  - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
 
   When compatible with the generic version, nodes must list the
   SoC-specific version corresponding to the platform first, followed by the

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 19:45 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: eric.dumazet, netdev, Randy Dunlap, linux-kernel, Matthew Wilcox,
	Michal Hocko, James Bottomley, linux-mm, dm-devel,
	Vlastimil Babka, David Rientjes, Andrew Morton, virtualization,
	David Miller, edumazet
In-Reply-To: <alpine.LRH.2.02.1804261516250.26980@file01.intranet.prod.int.rdu2.redhat.com>

On Thu, Apr 26, 2018 at 03:36:14PM -0400, Mikulas Patocka wrote:
> People on this list argue "this should be a kernel parameter".

How about making it a writeable attribute, so it's easy to turn on/off
after boot. Then you can keep it deterministic, userspace can play with
the attribute at random if it wants to.

-- 
MST

^ permalink raw reply

* Re: [PATCH net-next v2 00/11] udp gso
From: Willem de Bruijn @ 2018-04-26 19:46 UTC (permalink / raw)
  To: David Miller; +Cc: Network Development, Alexander Duyck, Willem de Bruijn
In-Reply-To: <20180426.152341.192100471142622918.davem@davemloft.net>

On Thu, Apr 26, 2018 at 3:23 PM, David Miller <davem@davemloft.net> wrote:
> From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
> Date: Thu, 26 Apr 2018 13:42:14 -0400
>
>> Segmentation offload reduces cycles/byte for large packets by
>> amortizing the cost of protocol stack traversal.
>>
>> This patchset implements GSO for UDP. A process can concatenate and
>> submit multiple datagrams to the same destination in one send call
>> by setting socket option SOL_UDP/UDP_SEGMENT with the segment size,
>> or passing an analogous cmsg at send time.
>
> Looks great.
>
> Build testing revealed that with ipv6=m we have to export
> __udp_gso_segment (patch #2) and udp_cmsg_send (patch #6).

Oops :/

Thanks for fixing up this breakage, David.

^ permalink raw reply

* [net 1/7] net/mlx5e: Allow offloading ipv4 header re-write for icmp
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Jianbo Liu, Saeed Mahameed
In-Reply-To: <20180426195842.29665-1-saeedm@mellanox.com>

From: Jianbo Liu <jianbol@mellanox.com>

For ICMPv4, the checksum is calculated from the ICMP headers and data.
Since the ICMPv4 checksum doesn't cover the IP header, we can allow to
do L3 header re-write for this protocol.

Fixes: bdd66ac0aeed ('net/mlx5e: Disallow TC offloading of unsupported match/action combinations')
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 4197001f9801..3c534fc43400 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1864,7 +1864,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
 	}
 
 	ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
-	if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+	if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+	    ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
 		pr_info("can't offload re-write of ip proto %d\n", ip_proto);
 		return false;
 	}
-- 
2.14.3

^ permalink raw reply related

* [pull request][net 0/7] Mellanox, mlx5 fixes 2018-04-26
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Saeed Mahameed

Hi Dave,

This pull request includes fixes for mlx5 core and netdev driver.

Please pull and let me know if there's any problems.

For -stable v4.12
    net/mlx5e: TX, Use correct counter in dma_map error flow
For -stable v4.13
    net/mlx5: Avoid cleaning flow steering table twice during error flow
For -stable v4.14
    net/mlx5e: Allow offloading ipv4 header re-write for icmp
For -stable v4.15
    net/mlx5e: DCBNL fix min inline header size for dscp
For -stable v4.16
    net/mlx5: Fix mlx5_get_vector_affinity function

Thanks,
Saeed.

---

The following changes since commit 25eb0ea7174c6e84f21fa59dccbddd0318b17b12:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf (2018-04-25 22:55:33 -0400)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-fixes-2018-04-25

for you to fetch changes up to 202854e9f4df99df1f79962a9e8f94a7de602f7b:

  net/mlx5: Properly deal with flow counters when deleting rules (2018-04-26 12:43:21 -0700)

----------------------------------------------------------------
mlx5-fixes-2018-04-25

----------------------------------------------------------------
Chris Mi (1):
      net/mlx5: Properly deal with flow counters when deleting rules

Huy Nguyen (1):
      net/mlx5e: DCBNL fix min inline header size for dscp

Israel Rukshin (1):
      net/mlx5: Fix mlx5_get_vector_affinity function

Jianbo Liu (1):
      net/mlx5e: Allow offloading ipv4 header re-write for icmp

Shahar Klein (1):
      net/mlx5e: Fix traffic between VF and representor

Talat Batheesh (1):
      net/mlx5: Avoid cleaning flow steering table twice during error flow

Tariq Toukan (1):
      net/mlx5e: TX, Use correct counter in dma_map error flow

 drivers/infiniband/hw/mlx5/main.c                  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c |  8 ++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 20 ++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 26 +++++++++++++---------
 include/linux/mlx5/driver.h                        | 12 +++-------
 7 files changed, 40 insertions(+), 36 deletions(-)

^ permalink raw reply

* [PATCH net-next 03/13] sctp: remove an if() that is always true
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

As noticed by Xin Long, the if() here is always true as PMTU can never
be 0.

Reported-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/associola.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b3aa95222bd52113295cb246c503c903bdd5c353..c5ed09cfa8423b17546e3d45f6d06db03af66384 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1397,10 +1397,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 			pmtu = t->pathmtu;
 	}
 
-	if (pmtu) {
-		asoc->pathmtu = pmtu;
-		asoc->frag_point = sctp_frag_point(asoc, pmtu);
-	}
+	asoc->pathmtu = pmtu;
+	asoc->frag_point = sctp_frag_point(asoc, pmtu);
 
 	pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
 		 asoc->pathmtu, asoc->frag_point);
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 00/13] sctp: refactor MTU handling
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long

Currently MTU handling is spread over SCTP stack. There are multiple
places doing same/similar calculations and updating them is error prone
as one spot can easily be left out.

This patchset converges it into a more concise and consistent code. In
general, it moves MTU handling from functions with bigger objectives,
such as sctp_assoc_add_peer(), to specific functions.

It's also a preparation for the next patchset, which removes the
duplication between sctp_make_op_error_space and
sctp_make_op_error_fixed and relies on sctp_mtu_payload introduced here.

More details on each patch.

Marcelo Ricardo Leitner (13):
  sctp: remove old and unused SCTP_MIN_PMTU
  sctp: move transport pathmtu calc away of sctp_assoc_add_peer
  sctp: remove an if() that is always true
  sctp: introduce sctp_assoc_set_pmtu
  sctp: introduce sctp_mtu_payload
  sctp: introduce sctp_assoc_update_frag_point
  sctp: remove sctp_assoc_pending_pmtu
  sctp: introduce sctp_dst_mtu
  sctp: remove sctp_transport_pmtu_check
  sctp: re-use sctp_transport_pmtu in sctp_transport_route
  sctp: honor PMTU_DISABLED when handling icmp
  sctp: consider idata chunks when setting SCTP_MAXSEG
  sctp: allow unsetting sockopt MAXSEG

 include/net/sctp/constants.h |  5 ++--
 include/net/sctp/sctp.h      | 52 ++++++++++++++------------------------
 include/net/sctp/structs.h   |  2 ++
 net/sctp/associola.c         | 60 +++++++++++++++++++++++---------------------
 net/sctp/chunk.c             | 12 +--------
 net/sctp/output.c            | 28 ++++++++-------------
 net/sctp/socket.c            | 43 ++++++++++++++-----------------
 net/sctp/transport.c         | 37 ++++++++++++++-------------
 8 files changed, 105 insertions(+), 134 deletions(-)

^ permalink raw reply

* [PATCH net-next v2 0/7] Microsemi Ocelot Ethernet switch support
From: Alexandre Belloni @ 2018-04-26 19:59 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, devicetree, linux-kernel,
	linux-mips, Alexandre Belloni, James Hogan

Hi,

This series adds initial support for the Microsemi Ethernet switch
present on Ocelot SoCs.

This only has bridging (and STP) support for now and it uses the
switchdev framework.
Coming features are VLAN filtering, link aggregation, IGMP snooping.

The switch can also be connected to an external CPU using PCIe.

Also, support for integration on other SoCs will be submitted.

The ocelot dts changes are here for reference and should probably go
through the MIPS tree once the bindings are accepted.

Changes in v2:
 - Dropped Microsemi Ocelot PHY support
 * MIIM driver:
   - Documented interrupts bindings
   - Moved the driver to drivers/net/phy/
   - Removed unused mutex
   - Removed MDIO bus scanning
 * Switchdev driver:
   - Changed compatible to mscc,vsc7514-switch
   - Removed unused header inclusion
   - Factorized MAC table selection in ocelot_mact_select()
   - Disable the port in ocelot_port_stop()
   - Fixed the smatch endianness warnings
   - int to unsinged int where necessary
   - Removed VID handling for the FDB it has been reworked anyway and will be
     submitted with VLAN support
   - Fixed up unused cases in ocelot_port_attr_set()
   - Added a loop to register all the IO register spaces
   - the ports are now in an ethernet-ports node

I've tried switching to NAPI but this is not working well, mainly because the
only way to disable interrupts is to actually mask them in the interrupt
controller (it is not possible to tell the switch to stop generating
interrupts).

Cc: James Hogan <jhogan@kernel.org>

Alexandre Belloni (7):
  dt-bindings: net: add DT bindings for Microsemi MIIM
  net: mscc: Add MDIO driver
  dt-bindings: net: add DT bindings for Microsemi Ocelot Switch
  net: mscc: Add initial Ocelot switch support
  MIPS: mscc: Add switch to ocelot
  MIPS: mscc: connect phys to ports on ocelot_pcb123
  MAINTAINERS: Add entry for Microsemi Ethernet switches

 .../devicetree/bindings/net/mscc-miim.txt     |   26 +
 .../devicetree/bindings/net/mscc-ocelot.txt   |   82 +
 MAINTAINERS                                   |    6 +
 arch/mips/boot/dts/mscc/ocelot.dtsi           |   88 ++
 arch/mips/boot/dts/mscc/ocelot_pcb123.dts     |   20 +
 drivers/net/ethernet/Kconfig                  |    1 +
 drivers/net/ethernet/Makefile                 |    1 +
 drivers/net/ethernet/mscc/Kconfig             |   29 +
 drivers/net/ethernet/mscc/Makefile            |    5 +
 drivers/net/ethernet/mscc/ocelot.c            | 1316 +++++++++++++++++
 drivers/net/ethernet/mscc/ocelot.h            |  554 +++++++
 drivers/net/ethernet/mscc/ocelot_ana.h        |  625 ++++++++
 drivers/net/ethernet/mscc/ocelot_board.c      |  313 ++++
 drivers/net/ethernet/mscc/ocelot_dev.h        |  275 ++++
 drivers/net/ethernet/mscc/ocelot_dev_gmii.h   |  154 ++
 drivers/net/ethernet/mscc/ocelot_hsio.h       |  785 ++++++++++
 drivers/net/ethernet/mscc/ocelot_io.c         |  116 ++
 drivers/net/ethernet/mscc/ocelot_qs.h         |   78 +
 drivers/net/ethernet/mscc/ocelot_qsys.h       |  270 ++++
 drivers/net/ethernet/mscc/ocelot_regs.c       |  399 +++++
 drivers/net/ethernet/mscc/ocelot_rew.h        |   81 +
 drivers/net/ethernet/mscc/ocelot_sys.h        |  140 ++
 drivers/net/phy/Kconfig                       |    7 +
 drivers/net/phy/Makefile                      |    1 +
 drivers/net/phy/mdio-mscc-miim.c              |  197 +++
 25 files changed, 5569 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/mscc-miim.txt
 create mode 100644 Documentation/devicetree/bindings/net/mscc-ocelot.txt
 create mode 100644 drivers/net/ethernet/mscc/Kconfig
 create mode 100644 drivers/net/ethernet/mscc/Makefile
 create mode 100644 drivers/net/ethernet/mscc/ocelot.c
 create mode 100644 drivers/net/ethernet/mscc/ocelot.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_ana.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_board.c
 create mode 100644 drivers/net/ethernet/mscc/ocelot_dev.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_dev_gmii.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_hsio.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_io.c
 create mode 100644 drivers/net/ethernet/mscc/ocelot_qs.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_qsys.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_regs.c
 create mode 100644 drivers/net/ethernet/mscc/ocelot_rew.h
 create mode 100644 drivers/net/ethernet/mscc/ocelot_sys.h
 create mode 100644 drivers/net/phy/mdio-mscc-miim.c

-- 
2.17.0

^ permalink raw reply

* [PATCH net-next 01/13] sctp: remove old and unused SCTP_MIN_PMTU
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

This value is not used anywhere in the code. In essence it is a
duplicate of SCTP_DEFAULT_MINSEGMENT, which is used by the stack.

SCTP_MIN_PMTU value makes more sense, but we should not change to it now
as it would risk breaking applications.

So this patch removes SCTP_MIN_PMTU and adjust the comment above it.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 include/net/sctp/constants.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 20ff237c5eb2ff358589b30bbd89d636e41b2d92..86f034b524d46178e4d26e868be3a2bf87acac4a 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -254,11 +254,10 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
 #define SCTP_TSN_MAP_SIZE 4096
 
 /* We will not record more than this many duplicate TSNs between two
- * SACKs.  The minimum PMTU is 576.  Remove all the headers and there
- * is enough room for 131 duplicate reports.  Round down to the
+ * SACKs.  The minimum PMTU is 512.  Remove all the headers and there
+ * is enough room for 117 duplicate reports.  Round down to the
  * nearest power of 2.
  */
-enum { SCTP_MIN_PMTU = 576 };
 enum { SCTP_MAX_DUP_TSNS = 16 };
 enum { SCTP_MAX_GABS = 16 };
 
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 02/13] sctp: move transport pathmtu calc away of sctp_assoc_add_peer
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

There was only one case that sctp_assoc_add_peer couldn't handle, which
is when SPP_PMTUD_DISABLE is set and pathmtu not initialized.
So add this situation to sctp_transport_route and reuse what was
already in there.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/associola.c | 9 +--------
 net/sctp/transport.c | 8 ++++++--
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a8f3b088fcb2ac82f58b7d6fb8894fe712e2bc0a..b3aa95222bd52113295cb246c503c903bdd5c353 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -652,15 +652,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	 */
 	peer->param_flags = asoc->param_flags;
 
-	sctp_transport_route(peer, NULL, sp);
-
 	/* Initialize the pmtu of the transport. */
-	if (peer->param_flags & SPP_PMTUD_DISABLE) {
-		if (asoc->pathmtu)
-			peer->pathmtu = asoc->pathmtu;
-		else
-			peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
-	}
+	sctp_transport_route(peer, NULL, sp);
 
 	/* If this is the first transport addr on this association,
 	 * initialize the association PMTU to the peer's PMTU.
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 47f82bd794d915188bad037463c2aa14175a55ef..c5fc3aed08a18658448b528b07dbb13900686313 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -307,11 +307,15 @@ void sctp_transport_route(struct sctp_transport *transport,
 		 * association's active path for getsockname().
 		 */
 		if (asoc && (!asoc->peer.primary_path ||
-				(transport == asoc->peer.active_path)))
+			     (transport == asoc->peer.active_path)))
 			opt->pf->to_sk_saddr(&transport->saddr,
 					     asoc->base.sk);
-	} else
+	} else if ((transport->param_flags & SPP_PMTUD_DISABLE) &&
+		   asoc && asoc->pathmtu) {
+		transport->pathmtu = asoc->pathmtu;
+	} else {
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+	}
 }
 
 /* Hold a reference to a transport.  */
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 04/13] sctp: introduce sctp_assoc_set_pmtu
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

All changes to asoc PMTU should now go through this wrapper, making it
easier to track them and to do other actions upon it.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 include/net/sctp/structs.h |  1 +
 net/sctp/associola.c       | 21 +++++++++++++--------
 net/sctp/socket.c          |  2 +-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 05594b248e5277790cca6542c3680510739ce18b..c5e244be8f1ea99118cac9ada0cfce871cb15d3f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2097,6 +2097,7 @@ int sctp_assoc_update(struct sctp_association *old,
 
 __u32 sctp_association_get_next_tsn(struct sctp_association *);
 
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu);
 void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
 void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
 void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index c5ed09cfa8423b17546e3d45f6d06db03af66384..85b362324084e9ba139a39293a527ef06824e55b 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -660,13 +660,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	 * If not and the current association PMTU is higher than the new
 	 * peer's PMTU, reset the association PMTU to the new peer's PMTU.
 	 */
-	if (asoc->pathmtu)
-		asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu);
-	else
-		asoc->pathmtu = peer->pathmtu;
-
-	pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc,
-		 asoc->pathmtu);
+	sctp_assoc_set_pmtu(asoc, asoc->pathmtu ?
+				  min_t(int, peer->pathmtu, asoc->pathmtu) :
+				  peer->pathmtu);
 
 	peer->pmtu_pending = 0;
 
@@ -1374,6 +1370,15 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
 	}
 }
 
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu)
+{
+	if (asoc->pathmtu != pmtu)
+		asoc->pathmtu = pmtu;
+
+	pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
+		 asoc->pathmtu, asoc->frag_point);
+}
+
 /* Update the association's pmtu and frag_point by going through all the
  * transports. This routine is called when a transport's PMTU has changed.
  */
@@ -1397,7 +1402,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 			pmtu = t->pathmtu;
 	}
 
-	asoc->pathmtu = pmtu;
+	sctp_assoc_set_pmtu(asoc, pmtu);
 	asoc->frag_point = sctp_frag_point(asoc, pmtu);
 
 	pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 80835ac26d2c3ce6559f75aaaa0b315fb77d9adf..eeec81d5c485b86076f4e3a4f5dc01fc28fbb91b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2539,7 +2539,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			trans->pathmtu = params->spp_pathmtu;
 			sctp_assoc_sync_pmtu(asoc);
 		} else if (asoc) {
-			asoc->pathmtu = params->spp_pathmtu;
+			sctp_assoc_set_pmtu(asoc, params->spp_pathmtu);
 		} else {
 			sp->pathmtu = params->spp_pathmtu;
 		}
-- 
2.14.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox