Netdev List
 help / color / mirror / Atom feed
* [PATCH bpf v4 12/14] selftests/tls: add a bidirectional test
From: Jakub Kicinski @ 2019-07-19 17:29 UTC (permalink / raw)
  To: john.fastabend, alexei.starovoitov, daniel
  Cc: edumazet, netdev, bpf, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190719172927.18181-1-jakub.kicinski@netronome.com>

Add a simple test which installs the TLS state for both directions,
sends and receives data on both sockets.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 tools/testing/selftests/net/tls.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 10df77326d34..6d78bd050813 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -684,6 +684,37 @@ TEST_F(tls, recv_lowat)
 	EXPECT_EQ(memcmp(send_mem, recv_mem + 10, 5), 0);
 }
 
+TEST_F(tls, bidir)
+{
+	struct tls12_crypto_info_aes_gcm_128 tls12;
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10];
+	int ret;
+
+	memset(&tls12, 0, sizeof(tls12));
+	tls12.info.version = TLS_1_3_VERSION;
+	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12));
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12));
+	ASSERT_EQ(ret, 0);
+
+	ASSERT_EQ(strlen(test_str) + 1, send_len);
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+
+	memset(buf, 0, sizeof(buf));
+
+	EXPECT_EQ(send(self->cfd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->fd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+};
+
 TEST_F(tls, pollin)
 {
 	char const *test_str = "test_poll";
-- 
2.21.0


^ permalink raw reply related

* [PATCH bpf v4 13/14] selftests/tls: close the socket with open record
From: Jakub Kicinski @ 2019-07-19 17:29 UTC (permalink / raw)
  To: john.fastabend, alexei.starovoitov, daniel
  Cc: edumazet, netdev, bpf, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190719172927.18181-1-jakub.kicinski@netronome.com>

Add test which sends some data with MSG_MORE and then
closes the socket (never calling send without MSG_MORE).
This should make sure we clean up open records correctly.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 tools/testing/selftests/net/tls.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 6d78bd050813..94a86ca882de 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -239,6 +239,16 @@ TEST_F(tls, msg_more)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST_F(tls, msg_more_unsent)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10];
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+}
+
 TEST_F(tls, sendmsg_single)
 {
 	struct msghdr msg;
-- 
2.21.0


^ permalink raw reply related

* [PATCH bpf v4 14/14] selftests/tls: add shutdown tests
From: Jakub Kicinski @ 2019-07-19 17:29 UTC (permalink / raw)
  To: john.fastabend, alexei.starovoitov, daniel
  Cc: edumazet, netdev, bpf, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190719172927.18181-1-jakub.kicinski@netronome.com>

Add test for killing the connection via shutdown.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 tools/testing/selftests/net/tls.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 94a86ca882de..630c5b884d43 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -952,6 +952,33 @@ TEST_F(tls, control_msg)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST_F(tls, shutdown)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10];
+
+	ASSERT_EQ(strlen(test_str) + 1, send_len);
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+
+	shutdown(self->fd, SHUT_RDWR);
+	shutdown(self->cfd, SHUT_RDWR);
+}
+
+TEST_F(tls, shutdown_unsent)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+
+	shutdown(self->fd, SHUT_RDWR);
+	shutdown(self->cfd, SHUT_RDWR);
+}
+
 TEST(non_established) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
 	struct sockaddr_in addr;
-- 
2.21.0


^ permalink raw reply related

* [PATCH bpf v4 06/14] bpf: sockmap, sock_map_delete needs to use xchg
From: Jakub Kicinski @ 2019-07-19 17:29 UTC (permalink / raw)
  To: john.fastabend, alexei.starovoitov, daniel; +Cc: edumazet, netdev, bpf
In-Reply-To: <20190719172927.18181-1-jakub.kicinski@netronome.com>

From: John Fastabend <john.fastabend@gmail.com>

__sock_map_delete() may be called from a tcp event such as unhash or
close from the following trace,

  tcp_bpf_close()
    tcp_bpf_remove()
      sk_psock_unlink()
        sock_map_delete_from_link()
          __sock_map_delete()

In this case the sock lock is held but this only protects against
duplicate removals on the TCP side. If the map is free'd then we have
this trace,

  sock_map_free
    xchg()                  <- replaces map entry
    sock_map_unref()
      sk_psock_put()
        sock_map_del_link()

The __sock_map_delete() call however uses a read, test, null over the
map entry which can result in both paths trying to free the map
entry.

To fix use xchg in TCP paths as well so we avoid having two references
to the same map entry.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 net/core/sock_map.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 52d4faeee18b..28702f2e9a4a 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -276,16 +276,20 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
 			     struct sock **psk)
 {
 	struct sock *sk;
+	int err = 0;
 
 	raw_spin_lock_bh(&stab->lock);
 	sk = *psk;
 	if (!sk_test || sk_test == sk)
-		*psk = NULL;
+		sk = xchg(psk, NULL);
+
+	if (likely(sk))
+		sock_map_unref(sk, psk);
+	else
+		err = -EINVAL;
+
 	raw_spin_unlock_bh(&stab->lock);
-	if (unlikely(!sk))
-		return -EINVAL;
-	sock_map_unref(sk, psk);
-	return 0;
+	return err;
 }
 
 static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
-- 
2.21.0


^ permalink raw reply related

* [PATCH net] hv_netvsc: Fix extra rcu_read_unlock in netvsc_recv_callback()
From: Haiyang Zhang @ 2019-07-19 17:33 UTC (permalink / raw)
  To: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger, olaf@aepfle.de,
	vkuznets, davem@davemloft.net, linux-kernel@vger.kernel.org

There is an extra rcu_read_unlock left in netvsc_recv_callback(),
after a previous patch that removes RCU from this function.
This patch removes the extra RCU unlock.

Fixes: 345ac08990b8 ("hv_netvsc: pass netvsc_device to receive callback")
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index afdcc56..3544e19 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -836,7 +836,6 @@ int netvsc_recv_callback(struct net_device *net,
 
 	if (unlikely(!skb)) {
 		++net_device_ctx->eth_stats.rx_no_memory;
-		rcu_read_unlock();
 		return NVSP_STAT_FAIL;
 	}
 
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH bpf v4 00/14] sockmap/tls fixes
From: Jakub Kicinski @ 2019-07-19 17:37 UTC (permalink / raw)
  To: john.fastabend, alexei.starovoitov, daniel
  Cc: edumazet, netdev, bpf, oss-drivers
In-Reply-To: <20190719172927.18181-1-jakub.kicinski@netronome.com>

On Fri, 19 Jul 2019 10:29:13 -0700, Jakub Kicinski wrote:
> John says:
> 
> Resolve a series of splats discovered by syzbot and an unhash
> TLS issue noted by Eric Dumazet.

Sorry for the delay, this code is quite tricky. According to my testing
TLS SW and HW should now work, I hope I didn't regress things on the
sockmap side.

This is not solving all the issues (ugh), apart from HW needing the
unhash/shutdown treatment, as discussed we may have a sender stuck in
wmem wait while we free context underneath. That's a "minor" UAF for
another day..

^ permalink raw reply

* Re: [PATCH] gve: replace kfree with kvfree
From: Catherine Sullivan @ 2019-07-19 17:44 UTC (permalink / raw)
  To: David Miller; +Cc: chuhongyuan, netdev, linux-kernel
In-Reply-To: <20190718.163207.289099133864098969.davem@davemloft.net>

On Thu, Jul 18, 2019 at 4:32 PM David Miller <davem@davemloft.net> wrote:
>
> From: Chuhong YUAN <chuhongyuan@outlook.com>
> Date: Wed, 17 Jul 2019 00:59:02 +0000
>
> > Variables allocated by kvzalloc should not be freed by kfree.
> > Because they may be allocated by vmalloc.
> > So we replace kfree with kvfree here.
> >
> > Signed-off-by: Chuhong Yuan <chuhongyuan@outlook.com>
>
> Applied, thanks Chuhong.
>
> GVE maintainers, you are upstream now and have to stay on top of review
> of changes like this.  Otherwise I'll just review it myself and apply
> it unless I find problems, and that may not be what you want :)

Will do, thanks for the review. And thanks for the fix Chuhong.

^ permalink raw reply

* Re: [PATCH iproute2-rc v1 0/7] Statistics counter support
From: Stephen Hemminger @ 2019-07-19 17:53 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
	RDMA mailing list
In-Reply-To: <20190717143157.27205-1-leon@kernel.org>

On Wed, 17 Jul 2019 17:31:49 +0300
Leon Romanovsky <leon@kernel.org> wrote:

> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Changelog v0->v1:
>  * Fixed typo in manual page (Gal)
>  * Rebased on top of d035cc1b "ip tunnel: warn when changing IPv6 tunnel without tunnel name"
>  * Dropped update header file because it was already merged.
> 
> ---------------------------------------------------------------------------------------
> 
> Hi,
> 
> This is supplementary part of accepted to rdma-next kernel series,
> that kernel series provided an option to get various counters: global
> and per-objects.
> 
> Currently, all counters are printed in format similar to other
> device/link properties, while "-p" option will print them in table like
> format.
> 
> [leonro@server ~]$ rdma stat show
> link mlx5_0/1 rx_write_requests 0 rx_read_requests 0 rx_atomic_requests
> 0 out_of_buffer 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err
> 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error
> 0 resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0
> req_remote_access_errors 0 resp_remote_access_errors 0
> resp_cqe_flush_error 0 req_cqe_flush_error 0 rp_cnp_ignored 0
> rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0
> 
> [leonro@server ~]$ rdma stat show -p
> link mlx5_0/1
> 	rx_write_requests 0
> 	rx_read_requests 0
> 	rx_atomic_requests 0
> 	out_of_buffer 0
> 	duplicate_request 0
> 	rnr_nak_retry_err 0
> 	packet_seq_err 0
> 	implied_nak_seq_err 0
> 	local_ack_timeout_err 0
> 	resp_local_length_error 0
> 	resp_cqe_error 0
> 	req_cqe_error 0
> 	req_remote_invalid_request 0
> 	req_remote_access_errors 0
> 	resp_remote_access_errors 0
> 	resp_cqe_flush_error 0
> 	req_cqe_flush_error 0
> 	rp_cnp_ignored 0
> 	rp_cnp_handled 0
> 	np_ecn_marked_roce_packets 0
> 	np_cnp_sent 0
> 
> Thanks
> 
> Mark Zhang (7):
>   rdma: Add "stat qp show" support
>   rdma: Add get per-port counter mode support
>   rdma: Add rdma statistic counter per-port auto mode support
>   rdma: Make get_port_from_argv() returns valid port in strict port mode
>   rdma: Add stat manual mode support
>   rdma: Add default counter show support
>   rdma: Document counter statistic
> 
>  man/man8/rdma-dev.8       |   1 +
>  man/man8/rdma-link.8      |   1 +
>  man/man8/rdma-resource.8  |   1 +
>  man/man8/rdma-statistic.8 | 167 +++++++++
>  man/man8/rdma.8           |   7 +-
>  rdma/Makefile             |   2 +-
>  rdma/rdma.c               |   3 +-
>  rdma/rdma.h               |   1 +
>  rdma/stat.c               | 759 ++++++++++++++++++++++++++++++++++++++
>  rdma/utils.c              |  17 +-
>  10 files changed, 954 insertions(+), 5 deletions(-)
>  create mode 100644 man/man8/rdma-statistic.8
>  create mode 100644 rdma/stat.c
> 
> --
> 2.20.1
> 

Applied now, thanks

^ permalink raw reply

* Re: [PATCH iproute2] json: fix backslash escape typo in jsonw_puts
From: Stephen Hemminger @ 2019-07-19 17:54 UTC (permalink / raw)
  To: Ivan Delalande; +Cc: David Ahern, netdev
In-Reply-To: <20190718011531.GA29129@visor>

On Wed, 17 Jul 2019 18:15:31 -0700
Ivan Delalande <colona@arista.com> wrote:

> Fixes: fcc16c22 ("provide common json output formatter")
> Signed-off-by: Ivan Delalande <colona@arista.com>

Applied.

^ permalink raw reply

* Re: [PATCH bpf] libbpf: fix missing __WORDSIZE definition
From: Andrii Nakryiko @ 2019-07-19 17:54 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, bpf, Networking, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Jiri Olsa, Namhyung Kim
In-Reply-To: <20190719011644.GN3624@kernel.org>

On Thu, Jul 18, 2019 at 6:16 PM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Thu, Jul 18, 2019 at 02:16:29PM -0700, Andrii Nakryiko escreveu:
> > On Thu, Jul 18, 2019 at 12:14 PM Arnaldo Carvalho de Melo
> > <arnaldo.melo@gmail.com> wrote:
> > >
> > > Em Thu, Jul 18, 2019 at 03:56:19PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > > I'll stop and replace my patch with yours to see if it survives all the
> > > > test builds...
> > >
> > > So, Alpine:3.4, the first image for this distro I did when I started
> > > these builds, survives the 6 builds with gcc and clang with your patch:
> > >
> > >
> > > [perfbuilder@quaco linux-perf-tools-build]$ export PERF_TARBALL=http://192.168.124.1/perf/perf-5.2.0.tar.xz
> > > [perfbuilder@quaco linux-perf-tools-build]$ dm
> > >    1  alpine:3.4                    : Ok   gcc (Alpine 5.3.0) 5.3.0, clang version 3.8.0 (tags/RELEASE_380/final)
> > >
> > >
> > > [perfbuilder@quaco linux-perf-tools-build]$ grep "+ make" dm.log/alpine\:3.4
> > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= -C /git/linux/tools/perf O=/tmp/build/perf
> > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= NO_LIBELF=1 -C /git/linux/tools/perf O=/tmp/build/perf
> > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= -C /git/linux/tools/perf O=/tmp/build/perf CC=clang
> > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= NO_LIBELF=1 -C /git/linux/tools/perf O=/tmp/build/perf CC=clang
> > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= LIBCLANGLLVM=1 -C /git/linux/tools/perf O=/tmp/build/perf CC=clang
> > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= LIBCLANGLLVM=1 -C /git/linux/tools/perf O=/tmp/build/perf
> > > [perfbuilder@quaco linux-perf-tools-build]$
> > >
> > > Probably all the rest will go well, will let you know.
> > >
> > > Daniel, do you mind if I carry this one in my perf/core branch? Its
> > > small and shouldn't clash with other patches, I think. It should go
> > > upstream soon:
> > >
> > > Andrii, there are these others:
> >
> > I took a look at them, but I think it would be better, if you could
> > post them as proper patches to
> > bpf@vger.kernel.org/netdev@vger.kernel.org, so that others can check
> > and comment, if necessary.
> >
> > One nit for all three of them: we typically prefix subject with just
> > "libbpf: " instead of "tools lib libbpf".
>
> Sure, that was mechanic, I do it like that for the patches I upstream,
> and that was like that in the beginning:
>
> [acme@quaco perf]$ git log --oneline tools/lib/bpf | grep lib | tail
> 9d759a9b4ac2 tools lib bpf: Collect map definition in bpf_object
> d8ad6a15cc3a tools lib bpf: Don't do a feature check when cleaning
> 6371ca3b541c bpf tools: Improve libbpf error reporting
> 0c77c04aa9c2 tools lib bpf: Change FEATURE-DUMP to FEATURE-DUMP.libbpf
> 715f8db9102f tools lib bpf: Fix compiler warning on CentOS 6
> 7c422f557266 tools build: Build fixdep helper from perf and basic libs
> 65f041bee783 tools lib bpf: Use FEATURE_USER to allow building in the same dir as perf
> 20517cd9c593 tools lib bpf: Fix up FEATURE_{TESTS,DISPLAY} usage
> cc4228d57c4c bpf tools: Check endianness and make libbpf fail early
> 1b76c13e4b36 bpf tools: Introduce 'bpf' library and add bpf feature check
> [acme@quaco perf]$
>
> Anyway, I'll resubmit the patches that you acked to bpf@vger and will
> let my container tests fail for those cases, sticking a warning so that
> Ingo knows that this is being dealt with and those problems will get
> fixed soon when the bpf tree merges upstream.

Great, thanks!

>
> > >
> > > 8dfb6ed300bf tools lib bpf: Avoid designated initializers for unnamed union members
> >
> > + attr.sample_period = attr.wakeup_events = 1;
> >
> > let's instead
> >
> > + attr.sample_period = 1;
> > + attr.wakeup_events = 1;
> >
> > I don't like multi-assignments.
>
> Meh, what's wrong with it? :)

Nothing, objectively :) But I don't remember seeing multi-assignments
in libbpf code base, so nitpicking for consistency's sake....


>
> > Also, if we are doing explicit assignment, let's do it for all the
> > fields, not split initialization like that.
>
> If that is what takes to get it to build everywhere, no problem. In
> tools/perf I'm used to doing it, documents that this is an oddity to
> support more systems :)
>
> > > 80f7f8f21441 tools lib bpf: Avoid using 'link' as it shadows a global definition in some systems
> >
> > For this one I'm confused. What compiler/system you are getting it on?
>
> > I tried to reproduce it with this example (trying both global
> > variable, as well as function):
> >
> > #include <stdio.h>
> >
> > //int link = 1;
> > void link() {}
> >
> > int f(int link) {
> >         return link;
> > }
> > int main() {
> >         printf("%d\n", f(123));
> >         return 0;
> > }
> >
> > I haven't gotten any errors nor warnings. I'm certainly liking
> > existing naming better, but my main concern is that we'll probably add
> > more code like this, and we'll forget about this problem and will
> > re-introduce.
>
> yeah, this happens from time to time with centos:6 and IIRC
> amazonlinux:1, oraclelinux:6.
>
> I still remember when I got reports from the twitter guys when something
> broke on rhel:5, that was the main reason to get these container tests
> in place, you never know where people are using this, and since before
> upstreaming I do the tests, fixing those became second nature 8-)
>
> > So I'd rather figure out why it's happening and some rare system and
> > see if we can mitigate that without all the renames.

Ok, did some more googling. This warning (turned error in your setup)
is emitted when -Wshadow option is enabled for GCC/clang. It appears
to be disabled by default, so it must be enabled somewhere for perf
build or something.

Would it be possible to disable it at least for libbpf when building
from perf either everywhere or for those systems where you see this
warning? I don't think this warning is useful, to be honest, just
random name conflict between any local and global variables will cause
this.


> >
> >
> > > d93fe741e291 tools lib bpf: Fix endianness macro usage for some compilers
> >
> > This one looks good to me, thanks!
> >
> > Acked-by: Andrii Nakryiko <andriin@fb.com>
>
> Ok, will collect this, change the prefix to: "libbpf: ..." and send to
> bpf@vger
>
> >
> > >
> > > If you could take a look at them at my tmp.perf/core branch at:
> > >
> > > https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/log/?h=tmp.perf/core
> > >
> > > I'm force pushing it now to replace my __WORDSIZE patch with yours, the
> > > SHAs should be the 3 above and the one below.
> > >
> > > And to build cleanly I had to cherry pick this one:
> > >
> > > 3091dafc1884 (HEAD -> perf/core) libbpf: fix ptr to u64 conversion warning on 32-bit platforms
> > >
> > > Alpine:3.5 just finished:
> > >
> > >    2 alpine:3.5                    : Ok   gcc (Alpine 6.2.1) 6.2.1 20160822, clang version 3.8.1 (tags/RELEASE_381/final)
> > >
> > > - Arnaldo
>
> --
>
> - Arnaldo

^ permalink raw reply

* Re: [PATCH bpf] libbpf: fix missing __WORDSIZE definition
From: Arnaldo Carvalho de Melo @ 2019-07-19 18:14 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Arnaldo Carvalho de Melo, Andrii Nakryiko, bpf, Networking,
	Daniel Borkmann, Alexei Starovoitov, Kernel Team, Jiri Olsa,
	Namhyung Kim
In-Reply-To: <CAEf4BzaKDTnqe4QYebNSoCLfhcUJbhzgXC5sG+y+c4JLc9PFqg@mail.gmail.com>

Em Fri, Jul 19, 2019 at 10:54:44AM -0700, Andrii Nakryiko escreveu:
> On Thu, Jul 18, 2019 at 6:16 PM Arnaldo Carvalho de Melo
> <arnaldo.melo@gmail.com> wrote:
> >
> > Em Thu, Jul 18, 2019 at 02:16:29PM -0700, Andrii Nakryiko escreveu:
> > > On Thu, Jul 18, 2019 at 12:14 PM Arnaldo Carvalho de Melo
> > > <arnaldo.melo@gmail.com> wrote:
> > > >
> > > > Em Thu, Jul 18, 2019 at 03:56:19PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > > > I'll stop and replace my patch with yours to see if it survives all the
> > > > > test builds...
> > > >
> > > > So, Alpine:3.4, the first image for this distro I did when I started
> > > > these builds, survives the 6 builds with gcc and clang with your patch:
> > > >
> > > >
> > > > [perfbuilder@quaco linux-perf-tools-build]$ export PERF_TARBALL=http://192.168.124.1/perf/perf-5.2.0.tar.xz
> > > > [perfbuilder@quaco linux-perf-tools-build]$ dm
> > > >    1  alpine:3.4                    : Ok   gcc (Alpine 5.3.0) 5.3.0, clang version 3.8.0 (tags/RELEASE_380/final)
> > > >
> > > >
> > > > [perfbuilder@quaco linux-perf-tools-build]$ grep "+ make" dm.log/alpine\:3.4
> > > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= -C /git/linux/tools/perf O=/tmp/build/perf
> > > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= NO_LIBELF=1 -C /git/linux/tools/perf O=/tmp/build/perf
> > > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= -C /git/linux/tools/perf O=/tmp/build/perf CC=clang
> > > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= NO_LIBELF=1 -C /git/linux/tools/perf O=/tmp/build/perf CC=clang
> > > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= LIBCLANGLLVM=1 -C /git/linux/tools/perf O=/tmp/build/perf CC=clang
> > > > + make ARCH= CROSS_COMPILE= EXTRA_CFLAGS= LIBCLANGLLVM=1 -C /git/linux/tools/perf O=/tmp/build/perf
> > > > [perfbuilder@quaco linux-perf-tools-build]$
> > > >
> > > > Probably all the rest will go well, will let you know.
> > > >
> > > > Daniel, do you mind if I carry this one in my perf/core branch? Its
> > > > small and shouldn't clash with other patches, I think. It should go
> > > > upstream soon:
> > > >
> > > > Andrii, there are these others:
> > >
> > > I took a look at them, but I think it would be better, if you could
> > > post them as proper patches to
> > > bpf@vger.kernel.org/netdev@vger.kernel.org, so that others can check
> > > and comment, if necessary.
> > >
> > > One nit for all three of them: we typically prefix subject with just
> > > "libbpf: " instead of "tools lib libbpf".
> >
> > Sure, that was mechanic, I do it like that for the patches I upstream,
> > and that was like that in the beginning:
> >
> > [acme@quaco perf]$ git log --oneline tools/lib/bpf | grep lib | tail
> > 9d759a9b4ac2 tools lib bpf: Collect map definition in bpf_object
> > d8ad6a15cc3a tools lib bpf: Don't do a feature check when cleaning
> > 6371ca3b541c bpf tools: Improve libbpf error reporting
> > 0c77c04aa9c2 tools lib bpf: Change FEATURE-DUMP to FEATURE-DUMP.libbpf
> > 715f8db9102f tools lib bpf: Fix compiler warning on CentOS 6
> > 7c422f557266 tools build: Build fixdep helper from perf and basic libs
> > 65f041bee783 tools lib bpf: Use FEATURE_USER to allow building in the same dir as perf
> > 20517cd9c593 tools lib bpf: Fix up FEATURE_{TESTS,DISPLAY} usage
> > cc4228d57c4c bpf tools: Check endianness and make libbpf fail early
> > 1b76c13e4b36 bpf tools: Introduce 'bpf' library and add bpf feature check
> > [acme@quaco perf]$
> >
> > Anyway, I'll resubmit the patches that you acked to bpf@vger and will
> > let my container tests fail for those cases, sticking a warning so that
> > Ingo knows that this is being dealt with and those problems will get
> > fixed soon when the bpf tree merges upstream.
> 
> Great, thanks!
> 
> >
> > > >
> > > > 8dfb6ed300bf tools lib bpf: Avoid designated initializers for unnamed union members
> > >
> > > + attr.sample_period = attr.wakeup_events = 1;
> > >
> > > let's instead
> > >
> > > + attr.sample_period = 1;
> > > + attr.wakeup_events = 1;
> > >
> > > I don't like multi-assignments.
> >
> > Meh, what's wrong with it? :)
> 
> Nothing, objectively :) But I don't remember seeing multi-assignments
> in libbpf code base, so nitpicking for consistency's sake....
> 
> 
> >
> > > Also, if we are doing explicit assignment, let's do it for all the
> > > fields, not split initialization like that.
> >
> > If that is what takes to get it to build everywhere, no problem. In
> > tools/perf I'm used to doing it, documents that this is an oddity to
> > support more systems :)
> >
> > > > 80f7f8f21441 tools lib bpf: Avoid using 'link' as it shadows a global definition in some systems
> > >
> > > For this one I'm confused. What compiler/system you are getting it on?
> >
> > > I tried to reproduce it with this example (trying both global
> > > variable, as well as function):
> > >
> > > #include <stdio.h>
> > >
> > > //int link = 1;
> > > void link() {}
> > >
> > > int f(int link) {
> > >         return link;
> > > }
> > > int main() {
> > >         printf("%d\n", f(123));
> > >         return 0;
> > > }
> > >
> > > I haven't gotten any errors nor warnings. I'm certainly liking
> > > existing naming better, but my main concern is that we'll probably add
> > > more code like this, and we'll forget about this problem and will
> > > re-introduce.
> >
> > yeah, this happens from time to time with centos:6 and IIRC
> > amazonlinux:1, oraclelinux:6.
> >
> > I still remember when I got reports from the twitter guys when something
> > broke on rhel:5, that was the main reason to get these container tests
> > in place, you never know where people are using this, and since before
> > upstreaming I do the tests, fixing those became second nature 8-)
> >
> > > So I'd rather figure out why it's happening and some rare system and
> > > see if we can mitigate that without all the renames.
> 
> Ok, did some more googling. This warning (turned error in your setup)
> is emitted when -Wshadow option is enabled for GCC/clang. It appears
> to be disabled by default, so it must be enabled somewhere for perf
> build or something.

Right, I came to the exact same conclusion, doing tests here:

[perfbuilder@3a58896a648d tmp]$ gcc -Wshadow shadow_global_decl.c   -o shadow_global_decl
shadow_global_decl.c: In function 'main':
shadow_global_decl.c:9: warning: declaration of 'link' shadows a global declaration
shadow_global_decl.c:4: warning: shadowed declaration is here
[perfbuilder@3a58896a648d tmp]$ gcc --version |& head -1
gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
[perfbuilder@3a58896a648d tmp]$ gcc shadow_global_decl.c   -o shadow_global_decl
[perfbuilder@3a58896a648d tmp]$ 

So I'm going to remove this warning from the places where it causes
problems.

> Would it be possible to disable it at least for libbpf when building
> from perf either everywhere or for those systems where you see this
> warning? I don't think this warning is useful, to be honest, just
> random name conflict between any local and global variables will cause
> this.

Yeah, I might end up having this applied.

[acme@quaco perf]$ git diff
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 495066bafbe3..b6e902a2312f 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
 EXTRA_WARNINGS += -Wold-style-definition
 EXTRA_WARNINGS += -Wpacked
 EXTRA_WARNINGS += -Wredundant-decls
-EXTRA_WARNINGS += -Wshadow
 EXTRA_WARNINGS += -Wstrict-prototypes
 EXTRA_WARNINGS += -Wswitch-default
 EXTRA_WARNINGS += -Wswitch-enum
[acme@quaco perf]$


Sorry for the noise...

- Arnaldo

^ permalink raw reply related

* Re: [PATCH bpf] tools/bpf: fix bpftool build with OUTPUT set
From: Jakub Kicinski @ 2019-07-19 18:17 UTC (permalink / raw)
  To: Ilya Leoshkevich
  Cc: bpf, netdev, lmb, gor, heiko.carstens, Arnaldo Carvalho de Melo
In-Reply-To: <43FB794B-6200-4560-BF10-BBF4B9247913@linux.ibm.com>

On Fri, 19 Jul 2019 15:12:24 +0200, Ilya Leoshkevich wrote:
> > Am 18.07.2019 um 20:51 schrieb Jakub Kicinski <jakub.kicinski@netronome.com>:
> > 
> > We should probably make a script with all the ways of calling make
> > should work. Otherwise we can lose track too easily.  
> 
> Thanks for the script!
> 
> I’m trying to make it all pass now, and hitting a weird issue in the
> Kbuild case. The build prints "No rule to make target
> 'scripts/Makefile.ubsan.o'" and proceeds with an empty BPFTOOL_VERSION,
> which causes problems later on.

Does it only break with UBSAN enabled?

> I've found that this is caused by sub_make_done=1 environment variable,
> and unsetting it indeed fixes the problem, since the root Makefile no
> longer uses the implicit %.o rule.
> 
> However, I wonder if that would be acceptable in the final version of
> the patch, and whether there is a cleaner way to achieve the same
> effect?

I'm not sure to be honest. Did you check how perf deals with that?

My goal was primarily to make sure we don't regress, so maybe if some
corner cases don't work that's not the end of the world. I think a good
rule of the thumb would be "if it works for perf it should work for
bpftool" ;) Perf gets a lot more build testing.

^ permalink raw reply

* Re: [PATCH bpf] libbpf: fix missing __WORDSIZE definition
From: Andrii Nakryiko @ 2019-07-19 18:26 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, bpf, Networking, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Jiri Olsa, Namhyung Kim
In-Reply-To: <20190719181423.GO3624@kernel.org>

On Fri, Jul 19, 2019 at 11:14 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Fri, Jul 19, 2019 at 10:54:44AM -0700, Andrii Nakryiko escreveu:
> > On Thu, Jul 18, 2019 at 6:16 PM Arnaldo Carvalho de Melo
> > <arnaldo.melo@gmail.com> wrote:
> > >
> > > Em Thu, Jul 18, 2019 at 02:16:29PM -0700, Andrii Nakryiko escreveu:
> > > > On Thu, Jul 18, 2019 at 12:14 PM Arnaldo Carvalho de Melo
> > > > <arnaldo.melo@gmail.com> wrote:
> > > > >
> > > > > Em Thu, Jul 18, 2019 at 03:56:19PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > > > > I'll stop and replace my patch with yours to see if it survives all the
> > > > > > test builds...
> > > > >
> > > > > So, Alpine:3.4, the first image for this distro I did when I started
> > > > > these builds, survives the 6 builds with gcc and clang with your patch:
> > > > >
> > > > >

[...]

> >
> > Ok, did some more googling. This warning (turned error in your setup)
> > is emitted when -Wshadow option is enabled for GCC/clang. It appears
> > to be disabled by default, so it must be enabled somewhere for perf
> > build or something.
>
> Right, I came to the exact same conclusion, doing tests here:
>
> [perfbuilder@3a58896a648d tmp]$ gcc -Wshadow shadow_global_decl.c   -o shadow_global_decl
> shadow_global_decl.c: In function 'main':
> shadow_global_decl.c:9: warning: declaration of 'link' shadows a global declaration
> shadow_global_decl.c:4: warning: shadowed declaration is here
> [perfbuilder@3a58896a648d tmp]$ gcc --version |& head -1
> gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
> [perfbuilder@3a58896a648d tmp]$ gcc shadow_global_decl.c   -o shadow_global_decl
> [perfbuilder@3a58896a648d tmp]$
>
> So I'm going to remove this warning from the places where it causes
> problems.
>
> > Would it be possible to disable it at least for libbpf when building
> > from perf either everywhere or for those systems where you see this
> > warning? I don't think this warning is useful, to be honest, just
> > random name conflict between any local and global variables will cause
> > this.
>
> Yeah, I might end up having this applied.

Thanks!

>
> [acme@quaco perf]$ git diff
> diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
> index 495066bafbe3..b6e902a2312f 100644
> --- a/tools/scripts/Makefile.include
> +++ b/tools/scripts/Makefile.include
> @@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
>  EXTRA_WARNINGS += -Wold-style-definition
>  EXTRA_WARNINGS += -Wpacked
>  EXTRA_WARNINGS += -Wredundant-decls
> -EXTRA_WARNINGS += -Wshadow
>  EXTRA_WARNINGS += -Wstrict-prototypes
>  EXTRA_WARNINGS += -Wswitch-default
>  EXTRA_WARNINGS += -Wswitch-enum
> [acme@quaco perf]$
>
>
> Sorry for the noise...

No worries, I learned something new today :)

>
> - Arnaldo

^ permalink raw reply

* Re: [PATCH 1/2] libbpf: Fix endianness macro usage for some compilers
From: Arnaldo Carvalho de Melo @ 2019-07-19 18:30 UTC (permalink / raw)
  To: Y Song
  Cc: Daniel Borkmann, Alexei Starovoitov, Clark Williams, bpf, netdev,
	Arnaldo Carvalho de Melo, Andrii Nakryiko, Adrian Hunter,
	Jiri Olsa, Namhyung Kim
In-Reply-To: <CAH3MdRXDz+Yn104Y3U0u-q_zW0cwSaH0QAPA8aWK9hpBc4hukw@mail.gmail.com>

Em Fri, Jul 19, 2019 at 09:25:07AM -0700, Y Song escreveu:
> On Fri, Jul 19, 2019 at 7:35 AM Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> > Using endian.h and its endianness macros makes this code build in a
> > wider range of compilers, as some don't have those macros
> > (__BYTE_ORDER__, __ORDER_LITTLE_ENDIAN__, __ORDER_BIG_ENDIAN__),
> > so use instead endian.h's macros (__BYTE_ORDER, __LITTLE_ENDIAN,
> > __BIG_ENDIAN) which makes this code even shorter :-)
> 
> gcc 4.6.0 starts to support __BYTE_ORDER__, __ORDER_LITTLE_ENDIAN__, etc.
> I guess those platforms with failing compilation have gcc < 4.6.0.
> Agree that for libbpf, which will be used outside kernel bpf selftest should
> try to compile with lower versions of gcc.

Yeah, I wouldn't mind at all if the answer to this patch was hey, the
minimal version for building libbpf is X.Y, no problem, its just that in
this case there is an alternative that works everywhere and the
resulting source code is even more compact :-)

- Arnaldo
 
> > Acked-by: Andrii Nakryiko <andriin@fb.com>
> > Cc: Adrian Hunter <adrian.hunter@intel.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Fixes: 12ef5634a855 ("libbpf: simplify endianness check")
> > Fixes: e6c64855fd7a ("libbpf: add btf__parse_elf API to load .BTF and .BTF.ext")
> > Link: https://lkml.kernel.org/n/tip-eep5n8vgwcdphw3uc058k03u@git.kernel.org
> > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> > ---
> >  tools/lib/bpf/btf.c    | 5 +++--
> >  tools/lib/bpf/libbpf.c | 5 +++--
> >  2 files changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 467224feb43b..d821107f55f9 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1,6 +1,7 @@
> >  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> >  /* Copyright (c) 2018 Facebook */
> >
> > +#include <endian.h>
> >  #include <stdio.h>
> >  #include <stdlib.h>
> >  #include <string.h>
> > @@ -419,9 +420,9 @@ struct btf *btf__new(__u8 *data, __u32 size)
> >
> >  static bool btf_check_endianness(const GElf_Ehdr *ehdr)
> >  {
> > -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> > +#if __BYTE_ORDER == __LITTLE_ENDIAN
> >         return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
> > -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> > +#elif __BYTE_ORDER == __BIG_ENDIAN
> >         return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
> >  #else
> >  # error "Unrecognized __BYTE_ORDER__"
> > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > index 794dd5064ae8..b1dec5b1de54 100644
> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -20,6 +20,7 @@
> >  #include <inttypes.h>
> >  #include <string.h>
> >  #include <unistd.h>
> > +#include <endian.h>
> >  #include <fcntl.h>
> >  #include <errno.h>
> >  #include <asm/unistd.h>
> > @@ -612,10 +613,10 @@ static int bpf_object__elf_init(struct bpf_object *obj)
> >
> >  static int bpf_object__check_endianness(struct bpf_object *obj)
> >  {
> > -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> > +#if __BYTE_ORDER == __LITTLE_ENDIAN
> >         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
> >                 return 0;
> > -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> > +#elif __BYTE_ORDER == __BIG_ENDIAN
> >         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
> >                 return 0;
> >  #else
> > --
> > 2.21.0
> >

-- 

- Arnaldo

^ permalink raw reply

* Re: [PATCH bpf] libbpf: fix missing __WORDSIZE definition
From: Arnaldo Carvalho de Melo @ 2019-07-19 18:34 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Arnaldo Carvalho de Melo, Andrii Nakryiko, bpf, Networking,
	Daniel Borkmann, Alexei Starovoitov, Kernel Team, Jiri Olsa,
	Namhyung Kim
In-Reply-To: <CAEf4BzZtYnVG3tnn25-TTJLOmeevv9fSZnAf7S2pG3VA+dMM+Q@mail.gmail.com>

Em Fri, Jul 19, 2019 at 11:26:50AM -0700, Andrii Nakryiko escreveu:
> On Fri, Jul 19, 2019 at 11:14 AM Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com> wrote:
> > Em Fri, Jul 19, 2019 at 10:54:44AM -0700, Andrii Nakryiko escreveu:
> > > Ok, did some more googling. This warning (turned error in your setup)
> > > is emitted when -Wshadow option is enabled for GCC/clang. It appears
> > > to be disabled by default, so it must be enabled somewhere for perf
> > > build or something.

> > Right, I came to the exact same conclusion, doing tests here:

> > [perfbuilder@3a58896a648d tmp]$ gcc -Wshadow shadow_global_decl.c   -o shadow_global_decl
> > shadow_global_decl.c: In function 'main':
> > shadow_global_decl.c:9: warning: declaration of 'link' shadows a global declaration
> > shadow_global_decl.c:4: warning: shadowed declaration is here
> > [perfbuilder@3a58896a648d tmp]$ gcc --version |& head -1
> > gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
> > [perfbuilder@3a58896a648d tmp]$ gcc shadow_global_decl.c   -o shadow_global_decl
> > [perfbuilder@3a58896a648d tmp]$

> > So I'm going to remove this warning from the places where it causes
> > problems.

> > > Would it be possible to disable it at least for libbpf when building
> > > from perf either everywhere or for those systems where you see this
> > > warning? I don't think this warning is useful, to be honest, just
> > > random name conflict between any local and global variables will cause
> > > this.

> > Yeah, I might end up having this applied.

> Thanks!

So, I'm ending up with the patch below, there is some value after all in
Wshadow, that is, from gcc 4.8 onwards :-)

- Arnaldo

diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 495066bafbe3..ded7a950dc40 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
 EXTRA_WARNINGS += -Wold-style-definition
 EXTRA_WARNINGS += -Wpacked
 EXTRA_WARNINGS += -Wredundant-decls
-EXTRA_WARNINGS += -Wshadow
 EXTRA_WARNINGS += -Wstrict-prototypes
 EXTRA_WARNINGS += -Wswitch-default
 EXTRA_WARNINGS += -Wswitch-enum
@@ -69,8 +68,16 @@ endif
 # will do for now and keep the above -Wstrict-aliasing=3 in place
 # in newer systems.
 # Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
+#
+# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
+# that takes into account Linus's comments (search for Wshadow) for the reasoning about
+# -Wshadow not being interesting before gcc 4.8.
+
 ifneq ($(filter 3.%,$(MAKE_VERSION)),)  # make-3
 EXTRA_WARNINGS += -fno-strict-aliasing
+EXTRA_WARNINGS += -Wno-shadow
+else
+EXTRA_WARNINGS += -Wshadow
 endif
 
 ifneq ($(findstring $(MAKEFLAGS), w),w)

^ permalink raw reply related

* Re: [PATCH v3 net-next 13/19] ionic: Add initial ethtool support
From: Shannon Nelson @ 2019-07-19 18:41 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev
In-Reply-To: <20190719024013.GC24765@lunn.ch>

On 7/18/19 7:40 PM, Andrew Lunn wrote:
> On Thu, Jul 18, 2019 at 05:12:07PM -0700, Shannon Nelson wrote:
>> On 7/17/19 8:28 PM, Andrew Lunn wrote:
>>> On Fri, Jul 12, 2019 at 10:16:31PM -0700, Shannon Nelson wrote:
>>>> On 7/8/19 7:14 PM, Andrew Lunn wrote:
>>>>>> +static int ionic_set_pauseparam(struct net_device *netdev,
>>>>>> +				struct ethtool_pauseparam *pause)
>>>>>> +{
>>>>>> +	struct lif *lif = netdev_priv(netdev);
>>>>>> +	struct ionic *ionic = lif->ionic;
>>>>>> +	struct ionic_dev *idev = &lif->ionic->idev;
>>>>>> +
>>>>>> +	u32 requested_pause;
>>>>>> +	u32 cur_autoneg;
>>>>>> +	int err;
>>>>>> +
>>>>>> +	cur_autoneg = idev->port_info->config.an_enable ? AUTONEG_ENABLE :
>>>>>> +								AUTONEG_DISABLE;
>>>>>> +	if (pause->autoneg != cur_autoneg) {
>>>>>> +		netdev_info(netdev, "Please use 'ethtool -s ...' to change autoneg\n");
>>>>>> +		return -EOPNOTSUPP;
>>>>>> +	}
>>>>>> +
>>>>>> +	/* change both at the same time */
>>>>>> +	requested_pause = PORT_PAUSE_TYPE_LINK;
>>>>>> +	if (pause->rx_pause)
>>>>>> +		requested_pause |= IONIC_PAUSE_F_RX;
>>>>>> +	if (pause->tx_pause)
>>>>>> +		requested_pause |= IONIC_PAUSE_F_TX;
>>>>>> +
>>>>>> +	if (requested_pause == idev->port_info->config.pause_type)
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	idev->port_info->config.pause_type = requested_pause;
>>>>>> +
>>>>>> +	mutex_lock(&ionic->dev_cmd_lock);
>>>>>> +	ionic_dev_cmd_port_pause(idev, requested_pause);
>>>>>> +	err = ionic_dev_cmd_wait(ionic, devcmd_timeout);
>>>>>> +	mutex_unlock(&ionic->dev_cmd_lock);
>>>>>> +	if (err)
>>>>>> +		return err;
>>>>> Hi Shannon
>>>>>
>>>>> I've no idea what the firmware black box is doing, but this looks
>>>>> wrong.
>>>>>
>>>>> pause->autoneg is about if the results of auto-neg should be used or
>>>>> not. If false, just configure the MAC with the pause settings and you
>>>>> are done. If the interface is being forced, so autoneg in general is
>>>>> disabled, just configure the MAC and you are done.
>>>>>
>>>>> If pause->autoneg is true and the interface is using auto-neg as a
>>>>> whole, you pass the pause values to the PHY for it to advertise and
>>>>> trigger an auto-neg. Once autoneg has completed, and the resolved
>>>>> settings are available, the MAC is configured with the resolved
>>>>> values.
>>>>>
>>>>> Looking at this code, i don't see any difference between configuring
>>>>> the MAC or configuring the PHY. I would expect pause->autoneg to be
>>>>> part of requested_pause somehow, so the firmware knows what is should
>>>>> do.
>>>>>
>>>>> 	Andrew
>>>> In this device there's actually very little the driver can do to directly
>>>> configure the mac or phy besides passing through to the firmware what the
>>>> user has requested - that happens here for the pause values, and in
>>>> ionic_set_link_ksettings() for autoneg.  The firmware is managing the port
>>>> based on these requests with the help of internally configured rules defined
>>>> in a customer setting.
>>> I get that. But the firmware needs to conform to what Linux
>>> expects. And what i see here does not conform. That is why i gave a
>>> bit of detail in my reply.
>>>
>>> What exactly does the firmware do? Once we know that, we can figure
>>> out when the driver should return -EOPNOTSUPP because of firmware
>>> limitations, and what it can configure and should return 0.
>> Because this is fairly smart FW, it handles this as expected.  I can add
>> this as another comment in the code.
> Hi Shannon
>
> Looking at the code, i don't see how it can handle this
> correctly. Please look at my comments, particularly the meaning of
> pause->autoneg and describe how the firmware does the right thing,
> given what information it is passed.
>

I guess I'm not sure how much better I can answer your question. Like 
several other devices, we don't support selecting pause->autoneg.  The 
firmware manages the PHY itself, the driver doesn't have direct access 
to the PHY.  The driver passes the tx and rx pause info down to the 
firmware in a command request.  The NIC firmware does an autoneg if 
autoneg is enabled on the port.

sln


^ permalink raw reply

* [PATCH net] tcp: be more careful in tcp_fragment()
From: Eric Dumazet @ 2019-07-19 18:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Eric Dumazet, Eric Dumazet, Andrew Prout, Jonathan Lemon,
	Michal Kubecek, Neal Cardwell, Yuchung Cheng, Christoph Paasch,
	Jonathan Looney

Some applications set tiny SO_SNDBUF values and expect
TCP to just work. Recent patches to address CVE-2019-11478
broke them in case of losses, since retransmits might
be prevented.

We should allow these flows to make progress.

This patch allows the first and last skb in retransmit queue
to be split even if memory limits are hit.

It also adds the some room due to the fact that tcp_sendmsg()
and tcp_sendpage() might overshoot sk_wmem_queued by about one full
TSO skb (64KB size). Note this allowance was already present
in stable backports for kernels < 4.15

Note for < 4.15 backports :
 tcp_rtx_queue_tail() will probably look like :

static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
{
	struct sk_buff *skb = tcp_send_head(sk);

	return skb ? tcp_write_queue_prev(sk, skb) : tcp_write_queue_tail(sk);
}

Fixes: f070ef2ac667 ("tcp: tcp_fragment() should apply sane memory limits")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrew Prout <aprout@ll.mit.edu>
Tested-by: Andrew Prout <aprout@ll.mit.edu>
Tested-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Tested-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Christoph Paasch <cpaasch@apple.com>
Cc: Jonathan Looney <jtl@netflix.com>
---
 include/net/tcp.h     |  5 +++++
 net/ipv4/tcp_output.c | 13 +++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f42d300f0cfaa87520320dd287a7b4750adf7d8a..e5cf514ba118e688ce3b3da66f696abd47e1d10f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1709,6 +1709,11 @@ static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
 	return skb_rb_first(&sk->tcp_rtx_queue);
 }
 
+static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
+{
+	return skb_rb_last(&sk->tcp_rtx_queue);
+}
+
 static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
 {
 	return skb_peek(&sk->sk_write_queue);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4af1f5dae9d3e937ef39685355c9d3f19ff3ee3b..6e4afc48d7bba7cded4d3fe38f32ab02328f9e05 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1288,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
 	int nsize, old_factor;
+	long limit;
 	int nlen;
 	u8 flags;
 
@@ -1298,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
-		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+	/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+	 * We need some allowance to not penalize applications setting small
+	 * SO_SNDBUF values.
+	 * Also allow first and last skb in retransmit queue to be split.
+	 */
+	limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+	if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+		     skb != tcp_rtx_queue_head(sk) &&
+		     skb != tcp_rtx_queue_tail(sk))) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
 		return -ENOMEM;
 	}
-- 
2.22.0.657.g960e92d24f-goog


^ permalink raw reply related

* Re: [PATCH v3 net-next 13/19] ionic: Add initial ethtool support
From: Andrew Lunn @ 2019-07-19 19:07 UTC (permalink / raw)
  To: Shannon Nelson; +Cc: netdev
In-Reply-To: <15796ade-68dd-4350-e481-3a3a1e7ce3d5@pensando.io>

On Fri, Jul 19, 2019 at 11:41:28AM -0700, Shannon Nelson wrote:
> On 7/18/19 7:40 PM, Andrew Lunn wrote:
> >On Thu, Jul 18, 2019 at 05:12:07PM -0700, Shannon Nelson wrote:
> >>On 7/17/19 8:28 PM, Andrew Lunn wrote:
> >>>On Fri, Jul 12, 2019 at 10:16:31PM -0700, Shannon Nelson wrote:
> >>>>On 7/8/19 7:14 PM, Andrew Lunn wrote:
> >>>>>>+static int ionic_set_pauseparam(struct net_device *netdev,
> >>>>>>+				struct ethtool_pauseparam *pause)
> >>>>>>+{
> >>>>>>+	struct lif *lif = netdev_priv(netdev);
> >>>>>>+	struct ionic *ionic = lif->ionic;
> >>>>>>+	struct ionic_dev *idev = &lif->ionic->idev;
> >>>>>>+
> >>>>>>+	u32 requested_pause;
> >>>>>>+	u32 cur_autoneg;
> >>>>>>+	int err;
> >>>>>>+
> >>>>>>+	cur_autoneg = idev->port_info->config.an_enable ? AUTONEG_ENABLE :
> >>>>>>+								AUTONEG_DISABLE;
> >>>>>>+	if (pause->autoneg != cur_autoneg) {
> >>>>>>+		netdev_info(netdev, "Please use 'ethtool -s ...' to change autoneg\n");
> >>>>>>+		return -EOPNOTSUPP;
> >>>>>>+	}
> >>>>>>+
> >>>>>>+	/* change both at the same time */
> >>>>>>+	requested_pause = PORT_PAUSE_TYPE_LINK;
> >>>>>>+	if (pause->rx_pause)
> >>>>>>+		requested_pause |= IONIC_PAUSE_F_RX;
> >>>>>>+	if (pause->tx_pause)
> >>>>>>+		requested_pause |= IONIC_PAUSE_F_TX;
> >>>>>>+
> >>>>>>+	if (requested_pause == idev->port_info->config.pause_type)
> >>>>>>+		return 0;
> >>>>>>+
> >>>>>>+	idev->port_info->config.pause_type = requested_pause;
> >>>>>>+
> >>>>>>+	mutex_lock(&ionic->dev_cmd_lock);
> >>>>>>+	ionic_dev_cmd_port_pause(idev, requested_pause);
> >>>>>>+	err = ionic_dev_cmd_wait(ionic, devcmd_timeout);
> >>>>>>+	mutex_unlock(&ionic->dev_cmd_lock);
> >>>>>>+	if (err)
> >>>>>>+		return err;
> >>>>>Hi Shannon
> >>>>>
> >>>>>I've no idea what the firmware black box is doing, but this looks
> >>>>>wrong.
> >>>>>
> >>>>>pause->autoneg is about if the results of auto-neg should be used or
> >>>>>not. If false, just configure the MAC with the pause settings and you
> >>>>>are done. If the interface is being forced, so autoneg in general is
> >>>>>disabled, just configure the MAC and you are done.
> >>>>>
> >>>>>If pause->autoneg is true and the interface is using auto-neg as a
> >>>>>whole, you pass the pause values to the PHY for it to advertise and
> >>>>>trigger an auto-neg. Once autoneg has completed, and the resolved
> >>>>>settings are available, the MAC is configured with the resolved
> >>>>>values.
> >>>>>
> >>>>>Looking at this code, i don't see any difference between configuring
> >>>>>the MAC or configuring the PHY. I would expect pause->autoneg to be
> >>>>>part of requested_pause somehow, so the firmware knows what is should
> >>>>>do.
> >>>>>
> >>>>>	Andrew
> >>>>In this device there's actually very little the driver can do to directly
> >>>>configure the mac or phy besides passing through to the firmware what the
> >>>>user has requested - that happens here for the pause values, and in
> >>>>ionic_set_link_ksettings() for autoneg.  The firmware is managing the port
> >>>>based on these requests with the help of internally configured rules defined
> >>>>in a customer setting.
> >>>I get that. But the firmware needs to conform to what Linux
> >>>expects. And what i see here does not conform. That is why i gave a
> >>>bit of detail in my reply.
> >>>
> >>>What exactly does the firmware do? Once we know that, we can figure
> >>>out when the driver should return -EOPNOTSUPP because of firmware
> >>>limitations, and what it can configure and should return 0.
> >>Because this is fairly smart FW, it handles this as expected.  I can add
> >>this as another comment in the code.
> >Hi Shannon
> >
> >Looking at the code, i don't see how it can handle this
> >correctly. Please look at my comments, particularly the meaning of
> >pause->autoneg and describe how the firmware does the right thing,
> >given what information it is passed.
> >
> 
> I guess I'm not sure how much better I can answer your question. Like
> several other devices, we don't support selecting pause->autoneg.  The
> firmware manages the PHY itself, the driver doesn't have direct access to
> the PHY.  The driver passes the tx and rx pause info down to the firmware in
> a command request.  The NIC firmware does an autoneg if autoneg is enabled
> on the port.

Hi Shannon

Thanks. That was the information i was looking for.

Please return -EOPNOTSUPP if pause->autoneg indicates autoneg results
should not be used. Your firmware does not support it, so the driver
should error out. Also the get function should use a hard coded value
for pause->autoneg.

If you ever fix your firmware, you can add full support for pause
configuration.

Thanks
	Andrew

^ permalink raw reply

* Re: [patch net-next rfc 0/7] net: introduce alternative names for network interfaces
From: Jiri Pirko @ 2019-07-19 19:16 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev, davem, jakub.kicinski, sthemmin, dsahern, dcbw, mkubecek,
	andrew, parav, saeedm, mlxsw
In-Reply-To: <20190719093135.5807f41c@hermes.lan>

Fri, Jul 19, 2019 at 06:31:35PM CEST, stephen@networkplumber.org wrote:
>On Fri, 19 Jul 2019 13:00:22 +0200
>Jiri Pirko <jiri@resnulli.us> wrote:
>
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> In the past, there was repeatedly discussed the IFNAMSIZ (16) limit for
>> netdevice name length. Now when we have PF and VF representors
>> with port names like "pfXvfY", it became quite common to hit this limit:
>> 0123456789012345
>> enp131s0f1npf0vf6
>> enp131s0f1npf0vf22
>> 
>> Udev cannot rename these interfaces out-of-the-box and user needs to
>> create custom rules to handle them.
>> 
>> Also, udev has multiple schemes of netdev names. From udev code:
>>  * Type of names:
>>  *   b<number>                             - BCMA bus core number
>>  *   c<bus_id>                             - bus id of a grouped CCW or CCW device,
>>  *                                           with all leading zeros stripped [s390]
>>  *   o<index>[n<phys_port_name>|d<dev_port>]
>>  *                                         - on-board device index number
>>  *   s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
>>  *                                         - hotplug slot index number
>>  *   x<MAC>                                - MAC address
>>  *   [P<domain>]p<bus>s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
>>  *                                         - PCI geographical location
>>  *   [P<domain>]p<bus>s<slot>[f<function>][u<port>][..][c<config>][i<interface>]
>>  *                                         - USB port number chain
>>  *   v<slot>                               - VIO slot number (IBM PowerVM)
>>  *   a<vendor><model>i<instance>           - Platform bus ACPI instance id
>>  *   i<addr>n<phys_port_name>              - Netdevsim bus address and port name
>> 
>> One device can be often renamed by multiple patterns at the
>> same time (e.g. pci address/mac).
>> 
>> This patchset introduces alternative names for network interfaces.
>> Main goal is to:
>> 1) Overcome the IFNAMSIZ limitation
>> 2) Allow to have multiple names at the same time (multiple udev patterns)
>> 3) Allow to use alternative names as handle for commands
>> 
>> See following examples.
>> 
>> $ ip link
>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
>>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>> 
>> -> Add alternative names for dummy0:  
>> 
>> $ ip link altname add dummy0 name someothername
>> $ ip link altname add dummy0 name someotherveryveryveryverylongname
>> $ ip link
>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
>>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>>     altname someothername
>>     altname someotherveryveryveryverylongname
>>   
>> -> Add bridge brx, add it's alternative name and use alternative names to  
>>    do enslavement.
>> 
>> $ ip link add name brx type bridge
>> $ ip link altname add brx name mypersonalsuperspecialbridge
>> $ ip link set someotherveryveryveryverylongname master mypersonalsuperspecialbridge
>> $ ip link
>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
>>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop master brx state DOWN mode DEFAULT group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>>     altname someothername
>>     altname someotherveryveryveryverylongname
>> 4: brx: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>>     altname mypersonalsuperspecialbridge
>> 
>> -> Add ipv4 address to the bridge using alternative name:  
>>     
>> $ ip addr add 192.168.0.1/24 dev mypersonalsuperspecialbridge
>> $ ip addr show mypersonalsuperspecialbridge     
>> 4: brx: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>>     altname mypersonalsuperspecialbridge
>>     inet 192.168.0.1/24 scope global brx
>>        valid_lft forever preferred_lft forever
>> 
>> -> Delete one of dummy0 alternative names:  
>> 
>> $ ip link altname del someotherveryveryveryverylongname    
>> $ ip link
>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
>>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop master brx state DOWN mode DEFAULT group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>>     altname someothername
>> 4: brx: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
>>     link/ether 7e:a2:d4:b8:91:7a brd ff:ff:ff:ff:ff:ff
>>     altname mypersonalsuperspecialbridge
>> 
>> TODO:
>> - notifications for alternative names add/removal
>> - sanitization of add/del cmds (similar to get link)
>> - test more usecases and write selftests
>> - extend support for other netlink ifaces (ovs for example)
>> - add sysfs symlink altname->basename?
>> 
>> Jiri Pirko (7):
>>   net: procfs: use index hashlist instead of name hashlist
>>   net: introduce name_node struct to be used in hashlist
>>   net: rtnetlink: add commands to add and delete alternative ifnames
>>   net: rtnetlink: put alternative names to getlink message
>>   net: rtnetlink: unify the code in __rtnl_newlink get dev with the rest
>>   net: rtnetlink: introduce helper to get net_device instance by ifname
>>   net: rtnetlink: add possibility to use alternative names as message
>>     handle
>> 
>>  include/linux/netdevice.h      |  14 ++-
>>  include/uapi/linux/if.h        |   1 +
>>  include/uapi/linux/if_link.h   |   3 +
>>  include/uapi/linux/rtnetlink.h |   7 ++
>>  net/core/dev.c                 | 152 ++++++++++++++++++++++----
>>  net/core/net-procfs.c          |   4 +-
>>  net/core/rtnetlink.c           | 192 +++++++++++++++++++++++++++++----
>>  security/selinux/nlmsgtab.c    |   4 +-
>>  8 files changed, 334 insertions(+), 43 deletions(-)
>> 
>
>You might want to add altname/ object property in sysfs?
>I.e
>   /sys/class/net/eth0/altname/

Sysfs representation is one point on my TODO list.
I didn't look much into this, but yeah, that would work.
I also want to have symlinks altname->basename.

>

^ permalink raw reply

* Re: [patch net-next rfc 2/7] net: introduce name_node struct to be used in hashlist
From: Jiri Pirko @ 2019-07-19 19:17 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev, davem, jakub.kicinski, sthemmin, dsahern, dcbw, mkubecek,
	andrew, parav, saeedm, mlxsw
In-Reply-To: <20190719092936.60c2d925@hermes.lan>

Fri, Jul 19, 2019 at 06:29:36PM CEST, stephen@networkplumber.org wrote:
>On Fri, 19 Jul 2019 13:00:24 +0200
>Jiri Pirko <jiri@resnulli.us> wrote:
>
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>  include/linux/netdevice.h | 10 +++-
>>  net/core/dev.c            | 96 +++++++++++++++++++++++++++++++--------
>>  2 files changed, 86 insertions(+), 20 deletions(-)
>> 
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 88292953aa6f..74f99f127b0e 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -918,6 +918,12 @@ struct dev_ifalias {
>>  struct devlink;
>>  struct tlsdev_ops;
>>  
>> +struct netdev_name_node {
>> +	struct hlist_node hlist;
>> +	struct net_device *dev;
>> +	char *name
>
>You probably can make this const char *

I'll try.

>
>Do you want to add __rcu to this list?

Which list?


^ permalink raw reply

* Re: [PATCH v2] vrf: make sure skb->data contains ip header to make routing
From: David Ahern @ 2019-07-19 19:17 UTC (permalink / raw)
  To: Peter Kosyh; +Cc: davem, Shrijeet Mukherjee, netdev, linux-kernel
In-Reply-To: <20190719081148.11512-1-p.kosyh@gmail.com>

On 7/19/19 2:11 AM, Peter Kosyh wrote:
> vrf_process_v4_outbound() and vrf_process_v6_outbound() do routing
> using ip/ipv6 addresses, but don't make sure the header is available
> in skb->data[] (skb_headlen() is less then header size).
> 
> Case:
> 
> 1) igb driver from intel.
> 2) Packet size is greater then 255.
> 3) MPLS forwards to VRF device.
> 
> So, patch adds pskb_may_pull() calls in vrf_process_v4/v6_outbound()
> functions.
> 
> Signed-off-by: Peter Kosyh <p.kosyh@gmail.com>
> ---
>  drivers/net/vrf.c | 58 +++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 35 insertions(+), 23 deletions(-)
> 

Reviewed-by: David Ahern <dsa@cumulusnetworks.com>

^ permalink raw reply

* [PATCH bpf] libbpf: fix SIGSEGV when BTF loading fails, but .BTF.ext exists
From: Andrii Nakryiko @ 2019-07-19 19:32 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, rdna
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko

In case when BTF loading fails despite sanitization, but BPF object has
.BTF.ext loaded as well, we free and null obj->btf, but not
obj->btf_ext. This leads to an attempt to relocate .BTF.ext later on
during bpf_object__load(), which assumes obj->btf is present. This leads
to SIGSEGV on null pointer access. Fix bug by freeing and nulling
obj->btf_ext as well.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/libbpf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 794dd5064ae8..87168f21ef43 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1500,6 +1500,12 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 			   BTF_ELF_SEC, err);
 		btf__free(obj->btf);
 		obj->btf = NULL;
+		/* btf_ext can't exist without btf, so free it as well */
+		if (obj->btf_ext) {
+			btf_ext__free(obj->btf_ext);
+			obj->btf_ext = NULL;
+		}
+
 		if (bpf_object__is_btf_mandatory(obj))
 			return err;
 	}
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH bpf] libbpf: fix SIGSEGV when BTF loading fails, but .BTF.ext exists
From: Alexei Starovoitov @ 2019-07-19 19:38 UTC (permalink / raw)
  To: Andrii Nakryiko, bpf@vger.kernel.org, netdev@vger.kernel.org,
	daniel@iogearbox.net, Andrey Ignatov
  Cc: andrii.nakryiko@gmail.com, Kernel Team
In-Reply-To: <20190719193242.2658962-1-andriin@fb.com>

On 7/19/19 12:32 PM, Andrii Nakryiko wrote:
> In case when BTF loading fails despite sanitization, but BPF object has
> .BTF.ext loaded as well, we free and null obj->btf, but not
> obj->btf_ext. This leads to an attempt to relocate .BTF.ext later on
> during bpf_object__load(), which assumes obj->btf is present. This leads
> to SIGSEGV on null pointer access. Fix bug by freeing and nulling
> obj->btf_ext as well.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>

Applied. Thanks

^ permalink raw reply

* Re: [GIT] Networking
From: pr-tracker-bot @ 2019-07-19 19:45 UTC (permalink / raw)
  To: David Miller; +Cc: torvalds, akpm, netdev, linux-kernel
In-Reply-To: <20190718.204420.2101649864834371997.davem@davemloft.net>

The pull request you sent on Thu, 18 Jul 2019 20:44:20 -0700 (PDT):

> git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git refs/heads/master

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/5f4fc6d440d77a2cf74fe4ea56955674ac7e35e7

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply

* [PATCH bpf] libbpf: sanitize VAR to conservative 1-byte INT
From: Andrii Nakryiko @ 2019-07-19 19:46 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, rdna
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko

If VAR in non-sanitized BTF was size less than 4, converting such VAR
into an INT with size=4 will cause BTF validation failure due to
violationg of STRUCT (into which DATASEC was converted) member size.
Fix by conservatively using size=1.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/libbpf.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 87168f21ef43..d8833ff6c4a1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1377,8 +1377,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
 		if (!has_datasec && kind == BTF_KIND_VAR) {
 			/* replace VAR with INT */
 			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
-			t->size = sizeof(int);
-			*(int *)(t+1) = BTF_INT_ENC(0, 0, 32);
+			/*
+			 * using size = 1 is the safest choice, 4 will be too
+			 * big and cause kernel BTF validation failure if
+			 * original variable took less than 4 bytes
+			 */
+			t->size = 1;
+			*(int *)(t+1) = BTF_INT_ENC(0, 0, 8);
 		} else if (!has_datasec && kind == BTF_KIND_DATASEC) {
 			/* replace DATASEC with STRUCT */
 			struct btf_var_secinfo *v = (void *)(t + 1);
-- 
2.17.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox