Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net 04/10] rxrpc: Don't need to take the RCU read lock in the packet receiver
From: David Howells @ 2018-10-08 22:47 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, pabeni, eric.dumazet, linux-afs, linux-kernel
In-Reply-To: <153903883882.17944.17642727588248415623.stgit@warthog.procyon.org.uk>

We don't need to take the RCU read lock in the rxrpc packet receive
function because it's held further up the stack in the IP input routine
around the UDP receive routines.

Fix this by dropping the RCU read lock calls from rxrpc_input_packet().
This simplifies the code.

Fixes: 70790dbe3f66 ("rxrpc: Pass the last Tx packet marker in the annotation buffer")
Signed-off-by: David Howells <dhowells@redhat.com>
---

 net/rxrpc/input.c |   41 +++++++++++++----------------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1866aeef2284..2dcef482acf2 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1120,6 +1120,8 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
  * The socket is locked by the caller and this prevents the socket from being
  * shut down and the local endpoint from going away, thus sk_user_data will not
  * be cleared until this function returns.
+ *
+ * Called with the RCU read lock held from the IP layer via UDP.
  */
 int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 {
@@ -1215,8 +1217,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 	if (sp->hdr.serviceId == 0)
 		goto bad_message;
 
-	rcu_read_lock();
-
 	if (rxrpc_to_server(sp)) {
 		/* Weed out packets to services we're not offering.  Packets
 		 * that would begin a call are explicitly rejected and the rest
@@ -1228,7 +1228,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 			if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
 			    sp->hdr.seq == 1)
 				goto unsupported_service;
-			goto discard_unlock;
+			goto discard;
 		}
 	}
 
@@ -1248,7 +1248,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 			/* Connection-level packet */
 			_debug("CONN %p {%d}", conn, conn->debug_id);
 			rxrpc_post_packet_to_conn(conn, skb);
-			goto out_unlock;
+			goto out;
 		}
 
 		/* Note the serial number skew here */
@@ -1267,19 +1267,19 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 
 		/* Ignore really old calls */
 		if (sp->hdr.callNumber < chan->last_call)
-			goto discard_unlock;
+			goto discard;
 
 		if (sp->hdr.callNumber == chan->last_call) {
 			if (chan->call ||
 			    sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
-				goto discard_unlock;
+				goto discard;
 
 			/* For the previous service call, if completed
 			 * successfully, we discard all further packets.
 			 */
 			if (rxrpc_conn_is_service(conn) &&
 			    chan->last_type == RXRPC_PACKET_TYPE_ACK)
-				goto discard_unlock;
+				goto discard;
 
 			/* But otherwise we need to retransmit the final packet
 			 * from data cached in the connection record.
@@ -1290,16 +1290,14 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 						    sp->hdr.serial,
 						    sp->hdr.flags, 0);
 			rxrpc_post_packet_to_conn(conn, skb);
-			goto out_unlock;
+			goto out;
 		}
 
 		call = rcu_dereference(chan->call);
 
 		if (sp->hdr.callNumber > chan->call_id) {
-			if (rxrpc_to_client(sp)) {
-				rcu_read_unlock();
+			if (rxrpc_to_client(sp))
 				goto reject_packet;
-			}
 			if (call)
 				rxrpc_input_implicit_end_call(conn, call);
 			call = NULL;
@@ -1318,55 +1316,42 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 	if (!call || atomic_read(&call->usage) == 0) {
 		if (rxrpc_to_client(sp) ||
 		    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
-			goto bad_message_unlock;
+			goto bad_message;
 		if (sp->hdr.seq != 1)
-			goto discard_unlock;
+			goto discard;
 		call = rxrpc_new_incoming_call(local, rx, peer, conn, skb);
-		if (!call) {
-			rcu_read_unlock();
+		if (!call)
 			goto reject_packet;
-		}
 		rxrpc_send_ping(call, skb, skew);
 		mutex_unlock(&call->user_mutex);
 	}
 
 	rxrpc_input_call_packet(call, skb, skew);
-	goto discard_unlock;
+	goto discard;
 
-discard_unlock:
-	rcu_read_unlock();
 discard:
 	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 out:
 	trace_rxrpc_rx_done(0, 0);
 	return 0;
 
-out_unlock:
-	rcu_read_unlock();
-	goto out;
-
 wrong_security:
-	rcu_read_unlock();
 	trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RXKADINCONSISTENCY, EBADMSG);
 	skb->priority = RXKADINCONSISTENCY;
 	goto post_abort;
 
 unsupported_service:
-	rcu_read_unlock();
 	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_INVALID_OPERATION, EOPNOTSUPP);
 	skb->priority = RX_INVALID_OPERATION;
 	goto post_abort;
 
 reupgrade:
-	rcu_read_unlock();
 	trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_PROTOCOL_ERROR, EBADMSG);
 	goto protocol_error;
 
-bad_message_unlock:
-	rcu_read_unlock();
 bad_message:
 	trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_PROTOCOL_ERROR, EBADMSG);

^ permalink raw reply related

* [PATCH net 03/10] rxrpc: Use the UDP encap_rcv hook
From: David Howells @ 2018-10-08 22:47 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, pabeni, eric.dumazet, linux-afs, linux-kernel
In-Reply-To: <153903883882.17944.17642727588248415623.stgit@warthog.procyon.org.uk>

Use the UDP encap_rcv hook to cut the bit out of the rxrpc packet reception
in which a packet is placed onto the UDP receive queue and then immediately
removed again by rxrpc.  Going via the queue in this manner seems like it
should be unnecessary.

This does, however, require the invention of a value to place in encap_type
as that's one of the conditions to switch packets out to the encap_rcv
hook.  Possibly the value doesn't actually matter for anything other than
sockopts on the UDP socket, which aren't accessible outside of rxrpc
anyway.

This seems to cut a bit of time out of the time elapsed between each
sk_buff being timestamped and turning up in rxrpc (the final number in the
following trace excerpts).  I measured this by making the rxrpc_rx_packet
trace point print the time elapsed between the skb being timestamped and
the current time (in ns), e.g.:

	... 424.278721: rxrpc_rx_packet: ...  ACK 25026

So doing a 512MiB DIO read from my test server, with an unmodified kernel:

	N       min     max     sum		mean    stddev
	27605   2626    7581    7.83992e+07     2840.04 181.029

and with the patch applied:

	N       min     max     sum		mean    stddev
	27547   1895    12165   6.77461e+07     2459.29 255.02

Signed-off-by: David Howells <dhowells@redhat.com>
---

 include/uapi/linux/udp.h |    1 +
 net/rxrpc/ar-internal.h  |    2 +-
 net/rxrpc/input.c        |   50 +++++++++++-----------------------------------
 net/rxrpc/local_object.c |   27 ++++++++++++++++++++-----
 4 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 09d00f8c442b..09502de447f5 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -40,5 +40,6 @@ struct udphdr {
 #define UDP_ENCAP_L2TPINUDP	3 /* rfc2661 */
 #define UDP_ENCAP_GTP0		4 /* GSM TS 09.60 */
 #define UDP_ENCAP_GTP1U		5 /* 3GPP TS 29.060 */
+#define UDP_ENCAP_RXRPC		6
 
 #endif /* _UAPI_LINUX_UDP_H */
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 63c43b3a2096..ab60c0313fd4 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -966,7 +966,7 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 /*
  * input.c
  */
-void rxrpc_data_ready(struct sock *);
+int rxrpc_input_packet(struct sock *, struct sk_buff *);
 
 /*
  * insecure.c
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index c3114fa66c92..1866aeef2284 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1121,7 +1121,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
  * shut down and the local endpoint from going away, thus sk_user_data will not
  * be cleared until this function returns.
  */
-void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
+int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 {
 	struct rxrpc_connection *conn;
 	struct rxrpc_channel *chan;
@@ -1135,6 +1135,13 @@ void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 
 	_enter("%p", udp_sk);
 
+	if (skb->tstamp == 0)
+		skb->tstamp = ktime_get_real();
+
+	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
+
+	skb_pull(skb, sizeof(struct udphdr));
+
 	/* The UDP protocol already released all skb resources;
 	 * we are free to add our own data there.
 	 */
@@ -1148,8 +1155,8 @@ void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 		static int lose;
 		if ((lose++ & 7) == 7) {
 			trace_rxrpc_rx_lose(sp);
-			rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
-			return;
+			rxrpc_free_skb(skb, rxrpc_skb_rx_lost);
+			return 0;
 		}
 	}
 
@@ -1332,7 +1339,7 @@ void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 out:
 	trace_rxrpc_rx_done(0, 0);
-	return;
+	return 0;
 
 out_unlock:
 	rcu_read_unlock();
@@ -1371,38 +1378,5 @@ void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 	trace_rxrpc_rx_done(skb->mark, skb->priority);
 	rxrpc_reject_packet(local, skb);
 	_leave(" [badmsg]");
-}
-
-void rxrpc_data_ready(struct sock *udp_sk)
-{
-	struct sk_buff *skb;
-	int ret;
-
-	for (;;) {
-		skb = skb_recv_udp(udp_sk, 0, 1, &ret);
-		if (!skb) {
-			if (ret == -EAGAIN)
-				return;
-
-			/* If there was a transmission failure, we get an error
-			 * here that we need to ignore.
-			 */
-			_debug("UDP socket error %d", ret);
-			continue;
-		}
-
-		rxrpc_new_skb(skb, rxrpc_skb_rx_received);
-
-		/* we'll probably need to checksum it (didn't call sock_recvmsg) */
-		if (skb_checksum_complete(skb)) {
-			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
-			__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0);
-			_debug("csum failed");
-			continue;
-		}
-
-		__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0);
-
-		rxrpc_input_packet(udp_sk, skb);
-	}
+	return 0;
 }
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 30862f44c9f1..cad0691c2bb4 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -19,6 +19,7 @@
 #include <linux/ip.h>
 #include <linux/hashtable.h>
 #include <net/sock.h>
+#include <net/udp.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
@@ -108,7 +109,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
  */
 static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 {
-	struct sock *sock;
+	struct sock *usk;
 	int ret, opt;
 
 	_enter("%p{%d,%d}",
@@ -123,10 +124,26 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 	}
 
 	/* set the socket up */
-	sock = local->socket->sk;
-	sock->sk_user_data	= local;
-	sock->sk_data_ready	= rxrpc_data_ready;
-	sock->sk_error_report	= rxrpc_error_report;
+	usk = local->socket->sk;
+	inet_sk(usk)->mc_loop = 0;
+
+	/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
+	inet_inc_convert_csum(usk);
+
+	rcu_assign_sk_user_data(usk, local);
+
+	udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC;
+	udp_sk(usk)->encap_rcv = rxrpc_input_packet;
+	udp_sk(usk)->encap_destroy = NULL;
+	udp_sk(usk)->gro_receive = NULL;
+	udp_sk(usk)->gro_complete = NULL;
+
+	udp_encap_enable();
+#if IS_ENABLED(CONFIG_IPV6)
+	if (local->srx.transport.family == AF_INET6)
+		udpv6_encap_enable();
+#endif
+	usk->sk_error_report = rxrpc_error_report;
 
 	/* if a local address was supplied then bind it */
 	if (local->srx.transport_len > sizeof(sa_family_t)) {

^ permalink raw reply related

* [PATCH net 00/10] rxrpc: Fix packet reception code
From: David Howells @ 2018-10-08 22:47 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, pabeni, eric.dumazet, linux-afs, linux-kernel


Here are a set of patches that prepares for and fix problems in rxrpc's
package reception code.  There serious problems are:

 (A) There's a window between binding the socket and setting the data_ready
     hook in which packets can find their way into the UDP socket's receive
     queues.

 (B) The skb_recv_udp() will return an error (and clear the error state) if
     there was an error on the Tx side.  rxrpc doesn't handle this.

 (C) The rxrpc data_ready handler doesn't fully drain the UDP receive
     queue.

 (D) The rxrpc data_ready handler assumes it is called in a non-reentrant
 state.

The second patch fixes (A) - (C); the third patch renders (B) and (C)
non-issues by using the recap_rcv hook instead of data_ready - and the
final patch fixes (D).  That last is the most complex.

The preparatory patches are:

 (1) Fix some places that are doing things in the wrong net namespace.

 (2) Stop taking the rcu read lock as it's held by the IP input routine in
     the call chain.

 (3) Only end the Tx phase if *we* rotated the final packet out of the Tx
     buffer.

 (4) Don't assume that the call state won't change after dropping the
     call_state lock.

 (5) Only take receive window and MTU suze parameters from an ACK packet if
     it's the latest ACK packet.

 (6) Record connection-level abort information correctly.

 (7) Fix a trace line.

And then there are three main patches - note that these are mixed in with
the preparatory patches somewhat:

 (1) Fix the setup window (A), skb_recv_udp() error check (B) and packet
     drainage (C).

 (2) Switch to using the encap_rcv instead of data_ready to cut out the
     effects of the UDP read queues and get the packets delivered directly.

 (3) Add more locking into the various packet input paths to defend against
     re-entrance (D).

The patches are tagged here:

	git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git
	rxrpc-fixes-20181008

and can also be found on the following branch:

	http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=rxrpc-fixes

David
---
David Howells (10):
      rxrpc: Fix some missed refs to init_net
      rxrpc: Fix the data_ready handler
      rxrpc: Use the UDP encap_rcv hook
      rxrpc: Don't need to take the RCU read lock in the packet receiver
      rxrpc: Don't check RXRPC_CALL_TX_LAST after calling rxrpc_rotate_tx_window()
      rxrpc: Carry call state out of locked section in rxrpc_rotate_tx_window()
      rxrpc: Only take the rwind and mtu values from latest ACK
      rxrpc: Fix connection-level abort handling
      rxrpc: Fix the rxrpc_tx_packet trace line
      rxrpc: Fix the packet reception routine


 include/trace/events/rxrpc.h |    1 
 include/uapi/linux/udp.h     |    1 
 net/rxrpc/ar-internal.h      |   23 ++--
 net/rxrpc/call_accept.c      |   27 +++-
 net/rxrpc/call_object.c      |    5 -
 net/rxrpc/conn_client.c      |   10 +-
 net/rxrpc/conn_event.c       |   26 ++--
 net/rxrpc/input.c            |  253 ++++++++++++++++++++++--------------------
 net/rxrpc/local_object.c     |   30 ++++-
 net/rxrpc/peer_event.c       |    5 +
 net/rxrpc/peer_object.c      |   29 +++--
 11 files changed, 236 insertions(+), 174 deletions(-)

^ permalink raw reply

* Re: [PATCH bpf-next v3 07/15] bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP
From: Eric Dumazet @ 2018-10-08 15:31 UTC (permalink / raw)
  To: Björn Töpel, magnus.karlsson, alexander.h.duyck,
	alexander.duyck, john.fastabend, ast, brouer,
	willemdebruijn.kernel, daniel, mst, netdev
  Cc: Björn Töpel, michael.lundkvist, jesse.brandeburg,
	anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-8-bjorn.topel@gmail.com>



On 05/02/2018 04:01 AM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> The xskmap is yet another BPF map, very much inspired by
> dev/cpu/sockmap, and is a holder of AF_XDP sockets. A user application
> adds AF_XDP sockets into the map, and by using the bpf_redirect_map
> helper, an XDP program can redirect XDP frames to an AF_XDP socket.
> 
> Note that a socket that is bound to certain ifindex/queue index will
> *only* accept XDP frames from that netdev/queue index. If an XDP
> program tries to redirect from a netdev/queue index other than what
> the socket is bound to, the frame will not be received on the socket.
> 
> A socket can reside in multiple maps.
> 
> v3: Fixed race and simplified code.
> v2: Removed one indirection in map lookup.
> 
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  include/linux/bpf.h       |  25 +++++
>  include/linux/bpf_types.h |   3 +
>  include/net/xdp_sock.h    |   7 ++
>  include/uapi/linux/bpf.h  |   1 +
>  kernel/bpf/Makefile       |   3 +
>  kernel/bpf/verifier.c     |   8 +-
>  kernel/bpf/xskmap.c       | 239 ++++++++++++++++++++++++++++++++++++++++++++++
>  net/xdp/xsk.c             |   5 +
>  8 files changed, 289 insertions(+), 2 deletions(-)
>  create mode 100644 kernel/bpf/xskmap.c
> 

This function is called under rcu_read_lock() , from map_update_elem()

> +
> +static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
> +			       u64 map_flags)
> +{
> +	struct xsk_map *m = container_of(map, struct xsk_map, map);
> +	u32 i = *(u32 *)key, fd = *(u32 *)value;
> +	struct xdp_sock *xs, *old_xs;
> +	struct socket *sock;
> +	int err;
> +
> +	if (unlikely(map_flags > BPF_EXIST))
> +		return -EINVAL;
> +	if (unlikely(i >= m->map.max_entries))
> +		return -E2BIG;
> +	if (unlikely(map_flags == BPF_NOEXIST))
> +		return -EEXIST;
> +
> +	sock = sockfd_lookup(fd, &err);
> +	if (!sock)
> +		return err;
> +
> +	if (sock->sk->sk_family != PF_XDP) {
> +		sockfd_put(sock);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	xs = (struct xdp_sock *)sock->sk;
> +
> +	if (!xsk_is_setup_for_bpf_map(xs)) {
> +		sockfd_put(sock);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	sock_hold(sock->sk);
> +
> +	old_xs = xchg(&m->xsk_map[i], xs);
> +	if (old_xs) {
> +		/* Make sure we've flushed everything. */

So it is illegal to call synchronize_net(), since it is a reschedule point.

> +		synchronize_net();
> +		sock_put((struct sock *)old_xs);
> +	}
> +
> +	sockfd_put(sock);
> +	return 0;
> +}
> 

^ permalink raw reply

* Re: [PATCH V2 net-next 2/5] net: Introduce a new MII time stamping interface.
From: Richard Cochran @ 2018-10-08 15:28 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: netdev, devicetree, David Miller, Florian Fainelli, Jacob Keller,
	Mark Rutland, Miroslav Lichvar, Rob Herring, Willem de Bruijn
In-Reply-To: <20181008150722.GC6216@lunn.ch>

On Mon, Oct 08, 2018 at 05:07:22PM +0200, Andrew Lunn wrote:
> So as you said, the phylib API has not changed much, which is common
> for mature code.

I meant that phy-LINK hasn't changed much.

> But i think long term, it will become less important.
> It will share the space with phylink. And any code which wants to be
> generically usable, should not depend on phydev.

Thanks for your view of the big picture.

> Architecturally, it
> seems wrong for you to hang what should be a generic time stamping
> framework on phydev. It is not future proof. net_device is future
> proof.

You still haven't said how net_device is going to work.

Today there are exactly zero phylink devices needing time stamping
support, but there are new phylib devices.  We don't have a
net_device->phylink connection, and it isn't needed yet.  Adding that
is way out of scope for this series.

Let's stick to phylib for now.  We can cross the other bridge when we
come to it.  Maybe the net_device->phylink will emerge for purposes
other that time stamping.  Let's not guess about how it should look.

We are only talking about kernel interfaces here, and so nothing is
set in stone.

Thanks,
Richard

^ permalink raw reply

* Re: selftests/bpf: test_kmod.sh hangs on all devices
From: Shuah Khan @ 2018-10-08 15:16 UTC (permalink / raw)
  To: Naresh Kamboju, Daniel Borkmann
  Cc: open list:KERNEL SELFTEST FRAMEWORK, Shuah Khan, Anders Roxell,
	Rafael Tinoco, ast, netdev, songliubraving, Willem de Bruijn
In-Reply-To: <CA+G9fYuEFofW=L0cmmAAJGaO4ZvaPYUg=h6XgjAY=PiacXM3JA@mail.gmail.com>

Hi Naresh,

Please use shuah@kernel.org for faster responses. I updated MAINTAINERS
entry a while back removing shuahkh@osg.samsung.com address due to IT
infrastructure changes at Samsung.

On 10/08/2018 08:55 AM, Naresh Kamboju wrote:
> Daniel,
> 
> On Mon, 8 Oct 2018 at 18:58, Daniel Borkmann <daniel@iogearbox.net> wrote:
>>
>> On 10/08/2018 03:13 PM, Naresh Kamboju wrote:
>>> BPF test case test_kmod.sh hangs on all devices running linux next.
>>>
>>> + cd /opt/kselftests/default-in-kernel/bpf
>>> + ./test_kmod.sh
>>> sysctl: cannot stat /proc/sys/net/core/bpf_jit_enable: No such file or directory
>>> sysctl: cannot stat /proc/sys/net/core/bpf_jit_harden: No such file or directory
>>> sysctl: cannot stat /proc/sys/net/core/bpf_jit_enable: No such file or directory
>>> sysctl: cannot stat /proc/sys/net/core/bpf_jit_harden: No such file or directory
>>> [ JIT enabled:0 hardened:0 ]
>>>
>>> https://lkft.validation.linaro.org/scheduler/job/429726
>>>
>>> Test hangs started from 4.19.0-rc4-next-20180918.
>>> Linux version 4.19.0-rc4-next-20180918 (oe-user@oe-host) (gcc version
>>> 7.1.1 20170707 (Linaro GCC 7.1-2017.08)) #1 SMP Tue Sep 18 05:26:00
>>> UTC 2018
>>>
>>> History can be compared from this page.
>>> https://qa-reports.linaro.org/lkft/linux-next-oe/tests/kselftest/bpf_test_kmod.sh
>>>
>>> OTOH,
>>> There is a kernel BUG,
>>
>> This is quite an old linux-next kernel, should be fixed by 100811936f89 ("bpf: test_bpf:
>> add init_net to dev for flow_dissector"). Please make sure you have that commit included
>> in your testing:
> 
> I will re-validate on latest code base and let you know.
> 
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=100811936f89fd455eda1984810c09003550555b
> 
> Thanks for the quick reply.
> 

Great. Looks like this has been sorted. Thanks Daniel.

thanks,
-- Shuah

^ permalink raw reply

* Re: [PATCH] netfilter: xt_quota: Don't use aligned attribute in sizeof
From: Pablo Neira Ayuso @ 2018-10-08 22:20 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Jozsef Kadlecsik, Florian Westphal, netfilter-devel, coreteam,
	netdev, linux-kernel
In-Reply-To: <20181006233331.13150-1-natechancellor@gmail.com>

On Sat, Oct 06, 2018 at 04:33:31PM -0700, Nathan Chancellor wrote:
> Clang warns:
> 
> net/netfilter/xt_quota.c:47:44: warning: 'aligned' attribute ignored
> when parsing type [-Wignored-attributes]
>         BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__aligned_u64));
>                                                   ^~~~~~~~~~~~~
> 
> Use 'sizeof(__u64)' instead, as the alignment doesn't affect the size
> of the type.

Applied, thanks.

^ permalink raw reply

* Re: [PATCH V2 net-next 2/5] net: Introduce a new MII time stamping interface.
From: Andrew Lunn @ 2018-10-08 15:07 UTC (permalink / raw)
  To: Richard Cochran
  Cc: netdev, devicetree, David Miller, Florian Fainelli, Jacob Keller,
	Mark Rutland, Miroslav Lichvar, Rob Herring, Willem de Bruijn
In-Reply-To: <20181008020439.m2cytnnv5ue7yc5l@localhost>

On Sun, Oct 07, 2018 at 07:04:39PM -0700, Richard Cochran wrote:
> On Sun, Oct 07, 2018 at 09:54:00PM +0200, Andrew Lunn wrote:
> > Sure, but things have moved on since then.
> 
> I was curious about this.  Based on your uses cases, I guess that you
> mean phylib?  But not much has changed AFAICT. (There is one new
> global function and two were removed, but that doesn't change the
> picture WRT time stamping.)
> 
> Phylink now has two or three new users, one of which is dsa.  Is that
> the big move?
> 
> The situation with MACs that handle their own PHYs without phylib is
> unchanged, AFAICT.
> 
> So what exactly do you mean?

Hi Richard

We are pushing phylink. I really do think anything using > 1Gbps links
should be using phylink, not phydev. And i think we have reached the
tipping point, that most new MACs will be > 1Gbps. 2.5G or maybe 5G
will be the new default. The MAC-PHY link is quiet messy when you get
above 1G. There are a number of options which you can use, and the MAC
and PHY need to negotiate a common set to use. phylink can do this,
phylib cannot easily do it. So i see phylib slowly becoming a legacy
API for MAC drivers.

We are also slowly seeing more SFPs, and Linux controlling them. SFP
are not new, they have been in top end switches for a long time. But
they are slowly becoming more popular in industrial settings, and such
embedded industrial systems tend to let Linux control them, not
firmware. And i think industry makes more use of PTP than other
fields, but i could be wrong. Since optical SFP modules are passive, a
bump-in-the-wire time stamper actually makes sense for these.

Also, fibre on the last mile is slowly becoming more of a thing, so
maybe we will start seeing CPE, consumer routers, with SFP ports?

As i said before, we are seeing more MACs which use firmware for
controlling the PHYs. I'm not sure why yet. Maybe it is coupled with
more MACs supporting > 1G, which is messy. Or the lack of good PHY
drivers for PHYs which support > 1G? Hopefully the drivers will
improve with time.

So as you said, the phylib API has not changed much, which is common
for mature code. But i think long term, it will become less important.
It will share the space with phylink. And any code which wants to be
generically usable, should not depend on phydev. Architecturally, it
seems wrong for you to hang what should be a generic time stamping
framework on phydev. It is not future proof. net_device is future
proof.

	  Andrew

^ permalink raw reply

* Re: selftests/bpf: test_kmod.sh hangs on all devices
From: Naresh Kamboju @ 2018-10-08 14:55 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: open list:KERNEL SELFTEST FRAMEWORK, Shuah Khan, Anders Roxell,
	Rafael Tinoco, ast, netdev, songliubraving, Willem de Bruijn
In-Reply-To: <febf5bc7-9e4e-1383-33bc-00416cc90e20@iogearbox.net>

Daniel,

On Mon, 8 Oct 2018 at 18:58, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 10/08/2018 03:13 PM, Naresh Kamboju wrote:
> > BPF test case test_kmod.sh hangs on all devices running linux next.
> >
> > + cd /opt/kselftests/default-in-kernel/bpf
> > + ./test_kmod.sh
> > sysctl: cannot stat /proc/sys/net/core/bpf_jit_enable: No such file or directory
> > sysctl: cannot stat /proc/sys/net/core/bpf_jit_harden: No such file or directory
> > sysctl: cannot stat /proc/sys/net/core/bpf_jit_enable: No such file or directory
> > sysctl: cannot stat /proc/sys/net/core/bpf_jit_harden: No such file or directory
> > [ JIT enabled:0 hardened:0 ]
> >
> > https://lkft.validation.linaro.org/scheduler/job/429726
> >
> > Test hangs started from 4.19.0-rc4-next-20180918.
> > Linux version 4.19.0-rc4-next-20180918 (oe-user@oe-host) (gcc version
> > 7.1.1 20170707 (Linaro GCC 7.1-2017.08)) #1 SMP Tue Sep 18 05:26:00
> > UTC 2018
> >
> > History can be compared from this page.
> > https://qa-reports.linaro.org/lkft/linux-next-oe/tests/kselftest/bpf_test_kmod.sh
> >
> > OTOH,
> > There is a kernel BUG,
>
> This is quite an old linux-next kernel, should be fixed by 100811936f89 ("bpf: test_bpf:
> add init_net to dev for flow_dissector"). Please make sure you have that commit included
> in your testing:

I will re-validate on latest code base and let you know.

>
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=100811936f89fd455eda1984810c09003550555b

Thanks for the quick reply.

- Naresh
>
> Thanks,
> Daniel

^ permalink raw reply

* Re: [PATCH net-next 00/19] Add support for Aquantia AQtion USB to 5/2.5GbE devices
From: Igor Russkikh @ 2018-10-08 14:52 UTC (permalink / raw)
  To: Oliver Neukum, David S . Miller
  Cc: linux-usb@vger.kernel.org, netdev@vger.kernel.org,
	Dmitry Bezrukov
In-Reply-To: <1539008496.10342.27.camel@suse.com>



On 08.10.2018 17:21, Oliver Neukum wrote:

>> The code of this driver is based on original ASIX sources and was extended
>> by Aquantia for 5G multigig support. 
> 
> Thank you for the driver. It is good to see drivers for cool hardware.
> Unfortunately there have been a few issues I have tried to point out
> in reviews. Please fix them and resubmit.
> 

Thank you Oliver for valuable input, working on this already.

Regards,
  Igor

^ permalink raw reply

* Re: [PATCH net-next 19/19] net: usb: aqc111: Add support for wake on LAN by MAGIC packet
From: Andrew Lunn @ 2018-10-08 14:47 UTC (permalink / raw)
  To: Igor Russkikh
  Cc: David S . Miller, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org, Dmitry Bezrukov
In-Reply-To: <0c6bc63e-2bc8-2479-ed22-ba174493478e@aquantia.com>

On Mon, Oct 08, 2018 at 02:12:59PM +0000, Igor Russkikh wrote:
> Hi Andrew,
> 
> >> +		if (aqc111_data->dpa) {
> >> +			aqc111_set_phy_speed(dev, AUTONEG_DISABLE, SPEED_100);
> > 
> > I don't think that works. You should leave AUTONEG on, but only
> > advertise SPEED_100 and trigger auto-neg. If you force it to 100,
> > there is no guarantee the peer will figure out what the new link speed
> > is. I've often seen failed auto-net result in 10/Half. So you will
> > loose the link, making WoL pointless.
> 
> Phy does not support 10M, low power mode explicitly uses 100M
> for power safety reasons.
> 
> It is meaningless here to add Autoneg to 100M because thats the only
> speedmask bit anyway.

If you have AUTONEG_DISABLE, i would assume you PHY is not even trying
to auto_neg. So the speedmask is irrelevent, it is not sent to the
peer. And since the peer is not receiving any auto-neg information, it
will fail to auto-neg, and most likely default to 10/Half.

To do this right, please take a look at this commit

commit 2b9672ddb6f347467d7b33b86c5dfc4d5c0501a8
Author: Heiner Kallweit <hkallweit1@gmail.com>
Date:   Thu Jul 12 21:32:53 2018 +0200

    net: phy: add phy_speed_down and phy_speed_up
    
    Some network drivers include functionality to speed down the PHY when
    suspending and just waiting for a WoL packet because this saves energy.
    This functionality is quite generic, therefore let's factor it out to
    phylib.
    
> 
> >> +	aqc111_set_phy_speed(dev, aqc111_data->autoneg,
> >> +			     aqc111_data->advertised_speed);
> >> +
> > 
> > Should that be conditional on aqc111_data->dpa?
> 
> Actually no, because set_phy_speed internally checks this flag.

So you should probably remove the check above when forcing the speed
to 100. Make the code symmetrical. 

> 
> >> +	u8 rsvd[283];
> >> +};
> > 
> > Do you really need these 283 bytes??
> 
> >>  	struct aqc111_phy_options phy_ops;
> >> +	struct aqc111_wol_cfg wol_cfg;
> > 
> > Those 283 bytes make this whole structure bigger...
> 
> FW interface expects the WOL config request WOL_CFG_SIZE bytes.
> These reserved fields are just not used now by linux driver.
> They configure extra wol features like a sleep proxy.
> Thus, we anyway have to allocate this somewhere.

Well, i think your low level function for actually sending a command
does a dup before sending. You don't actually send this, you send a
copy. Maybe you can pad it out then?

      Andrew

^ permalink raw reply

* Re: [PATCH V1 net 0/5] minor bug fixes for ENA Ethernet driver
From: Machulsky, Zorik @ 2018-10-08 14:42 UTC (permalink / raw)
  To: Bshara, Nafea, Kiyanovski, Arthur, davem@davemloft.net,
	netdev@vger.kernel.org
  Cc: Woodhouse, David, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali
In-Reply-To: <7BF68A50-AD04-4FA7-91ED-F3F6412E2E2E@amazon.com>

Acked-by: Zorik Machulsky <zorik@amazon.com>
    
    On 10/8/18, 5:28 AM, "akiyano@amazon.com" <akiyano@amazon.com> wrote:
    
        From: Arthur Kiyanovski <akiyano@amazon.com>
        
        Arthur Kiyanovski (5):
          net: ena: fix indentations in ena_defs for better readability
          net: ena: fix warning in rmmod caused by double iounmap
          net: ena: fix rare bug when failed restart/resume is followed by
            driver removal
          net: ena: fix NULL dereference due to untimely napi initialization
          net: ena: fix auto casting to boolean
        
         drivers/net/ethernet/amazon/ena/ena_admin_defs.h  | 308 +++++++++-------------
         drivers/net/ethernet/amazon/ena/ena_eth_com.c     |   8 +-
         drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h | 219 ++++++++-------
         drivers/net/ethernet/amazon/ena/ena_netdev.c      |  23 +-
         drivers/net/ethernet/amazon/ena/ena_regs_defs.h   | 206 +++++++--------
         5 files changed, 341 insertions(+), 423 deletions(-)
        
        -- 
        2.7.4
        
        
    
    


^ permalink raw reply

* Re: [PATCH V1 net 0/5] minor bug fixes for ENA Ethernet driver
From: Machulsky, Zorik @ 2018-10-08 14:38 UTC (permalink / raw)
  To: Bshara, Nafea, Kiyanovski, Arthur, davem@davemloft.net,
	netdev@vger.kernel.org
  Cc: Woodhouse, David, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali
In-Reply-To: <7BF68A50-AD04-4FA7-91ED-F3F6412E2E2E@amazon.com>

Ship it

On 10/8/18, 5:47 AM, "Bshara, Nafea" <nafea@amazon.com> wrote:

    Ship it
    
    On 10/8/18, 5:28 AM, "akiyano@amazon.com" <akiyano@amazon.com> wrote:
    
        From: Arthur Kiyanovski <akiyano@amazon.com>
        
        Arthur Kiyanovski (5):
          net: ena: fix indentations in ena_defs for better readability
          net: ena: fix warning in rmmod caused by double iounmap
          net: ena: fix rare bug when failed restart/resume is followed by
            driver removal
          net: ena: fix NULL dereference due to untimely napi initialization
          net: ena: fix auto casting to boolean
        
         drivers/net/ethernet/amazon/ena/ena_admin_defs.h  | 308 +++++++++-------------
         drivers/net/ethernet/amazon/ena/ena_eth_com.c     |   8 +-
         drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h | 219 ++++++++-------
         drivers/net/ethernet/amazon/ena/ena_netdev.c      |  23 +-
         drivers/net/ethernet/amazon/ena/ena_regs_defs.h   | 206 +++++++--------
         5 files changed, 341 insertions(+), 423 deletions(-)
        
        -- 
        2.7.4
        
        
    
    


^ permalink raw reply

* Re: [PATCH net-next 05/19] net: usb: aqc111: Introduce PHY access
From: Oliver Neukum @ 2018-10-08 14:24 UTC (permalink / raw)
  To: Igor Russkikh, David S . Miller
  Cc: Dmitry Bezrukov, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <747f70d8-303a-aeea-2359-26b03a5e0336@aquantia.com>

On Mo, 2018-10-08 at 17:10 +0300, Igor Russkikh wrote:
> Hi Oliver,
> 
> > > +	aqc111_read_cmd(dev, AQ_ACCESS_MAC, AQ_FW_VER_MAJOR,
> > > +			1, 1, &aqc111_data->fw_ver.major);
> > > +	aqc111_read_cmd(dev, AQ_ACCESS_MAC, AQ_FW_VER_MINOR,
> > > +			1, 1, &aqc111_data->fw_ver.minor);
> > > +	aqc111_read_cmd(dev, AQ_ACCESS_MAC, AQ_FW_VER_REV,
> > > +			1, 1, &aqc111_data->fw_ver.rev);
> > 
> > Why read the stuff you don't need?
> 
> fw_ver is used below to determine phy access mode.
> 
> fw_ver.rev is not used in this exact patch, thats true,
> but it gets reported in later patches in the set.

Hi,

OK that makes sense.

	Regards
		Oliver

^ permalink raw reply

* Re: [PATCH net-next 00/19] Add support for Aquantia AQtion USB to 5/2.5GbE devices
From: Oliver Neukum @ 2018-10-08 14:21 UTC (permalink / raw)
  To: Igor Russkikh, David S . Miller
  Cc: linux-usb@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <cover.1538734658.git.igor.russkikh@aquantia.com>

On Fr, 2018-10-05 at 10:24 +0000, Igor Russkikh wrote:
> This patchset introduces support for new multigig ethernet to USB dongle,
> developed jointly by Aquantia (Phy) and ASIX (USB MAC).
> 
> The driver has similar structure with other ASIX MAC drivers (AX88179), but
> with a number of important differences:
> - Driver supports both direct Phy and custom firmware interface for Phy
>   programming. This is due to different firmware modules available with this
>   product.
> - Driver handles new 2.5G/5G link speed configuration and reporting.
> - Device support all speeds from 100M up to 5G.
> - Device supports MTU up to 16K.
> 
> Device supports various standard networking features, like
> checksum offloads, vlan tagging/filtering, TSO.
> 
> The code of this driver is based on original ASIX sources and was extended
> by Aquantia for 5G multigig support. 

Thank you for the driver. It is good to see drivers for cool hardware.
Unfortunately there have been a few issues I have tried to point out
in reviews. Please fix them and resubmit.

	Regards
		Oliver

^ permalink raw reply

* Re: [PATCH v8 08/15] octeontx2-af: Add RVU block LF provisioning support
From: Arnd Bergmann @ 2018-10-08 14:29 UTC (permalink / raw)
  To: Sunil Kovvuri; +Cc: Networking, David Miller, linux-soc, sgoutham
In-Reply-To: <CA+sq2CcXCEY14d+dsFJvokFLykweeZUxPp911-oVyvW=Ye1N=A@mail.gmail.com>

On Mon, Oct 8, 2018 at 3:59 PM Sunil Kovvuri <sunil.kovvuri@gmail.com> wrote:
> On Mon, Oct 8, 2018 at 5:41 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > On Sun, Oct 7, 2018 at 5:00 PM <sunil.kovvuri@gmail.com> wrote:
> > >
> > > +/* Structure for requesting resource provisioning.
> > > + * 'modify' flag to be used when either requesting more
> > > + * or to detach partial of a cetain resource type.
> > > + * Rest of the fields specify how many of what type to
> > > + * be attached.
> > > + */
> > > +struct rsrc_attach {
> > > +       struct mbox_msghdr hdr;
> > > +       u8   modify:1;
> > > +       u8   npalf:1;
> > > +       u8   nixlf:1;
> > > +       u16  sso;
> > > +       u16  ssow;
> > > +       u16  timlfs;
> > > +       u16  cptlfs;
> > > +};
> > > +
> > > +/* Structure for relinquishing resources.
> > > + * 'partial' flag to be used when relinquishing all resources
> > > + * but only of a certain type. If not set, all resources of all
> > > + * types provisioned to the RVU function will be detached.
> > > + */
> > > +struct rsrc_detach {
> > > +       struct mbox_msghdr hdr;
> > > +       u8 partial:1;
> > > +       u8 npalf:1;
> > > +       u8 nixlf:1;
> > > +       u8 sso:1;
> > > +       u8 ssow:1;
> > > +       u8 timlfs:1;
> > > +       u8 cptlfs:1;
> > > +};
> >
> > Are these bitfields part of the message that gets sent to the
> > underlying implementation? It seems there is still an endianess
> > issue then.
>
> No these structures are not used for kernel driver to firmware
> communication where
> register reads via readq are involved. These structures are used for
> mailbox communication
> between different PCI devices and this mailbox is a shared memory.

Ok, thanks for the clarification.

       Arnd

^ permalink raw reply

* Re: [PATCH v8 01/15] octeontx2-af: Add Marvell OcteonTX2 RVU AF driver
From: Arnd Bergmann @ 2018-10-08 14:29 UTC (permalink / raw)
  To: Sunil Kovvuri; +Cc: Networking, David Miller, linux-soc, sgoutham
In-Reply-To: <CA+sq2Cdi2VqoycE9Z37sToro=q-mrX5jqq+HyL_bFKn+-DHoyA@mail.gmail.com>

On Mon, Oct 8, 2018 at 3:50 PM Sunil Kovvuri <sunil.kovvuri@gmail.com> wrote:
>
> On Mon, Oct 8, 2018 at 5:52 PM Arnd Bergmann <arnd@arndb.de> wrote:
> >
> > On Sun, Oct 7, 2018 at 4:59 PM <sunil.kovvuri@gmail.com> wrote:
> >
> > > --- /dev/null
> > > +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig
> > > @@ -0,0 +1,12 @@
> > > +#
> > > +# Marvell OcteonTX2 drivers configuration
> > > +#
> > > +
> > > +config OCTEONTX2_AF
> > > +       tristate "Marvell OcteonTX2 RVU Admin Function driver"
> > > +       depends on ARM64 && PCI
> >
> > You should try to allow building it on x86 and other architectures, even though
> > the driver won't be used there this helps get reports from static
> > build infrastructure.
> > You could use e.g.
> >
> >       depends on (64BIT && COMPILE_TEST) || ARM64
> >       depends on PCI
> >
> >            Arnd
>
> Thanks for the suggestion.
> But going forward we will have few arm64 assembly instructions used in
> the driver.

What is those inline assembly for? Is there a chance they can
be abstracted through some other interface?

     Arnd

^ permalink raw reply

* Re: [PATCH net-next 18/19] net: usb: aqc111: Implement get/set_link_ksettings callbacks
From: Oliver Neukum @ 2018-10-08 14:18 UTC (permalink / raw)
  To: Igor Russkikh, David S . Miller
  Cc: Dmitry Bezrukov, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <694b148fa9e79d42fe8a815821ae5b0bc12c4aea.1538734658.git.igor.russkikh@aquantia.com>

On Fr, 2018-10-05 at 10:25 +0000, Igor Russkikh wrote:
> From: Dmitry Bezrukov <dmitry.bezrukov@aquantia.com>
> 
> +static int aqc111_get_link_ksettings(struct net_device *net,
> +				     struct ethtool_link_ksettings *elk)
> +{
> +	struct usbnet *dev = netdev_priv(net);
> +	enum usb_device_speed usb_speed = dev->udev->speed;
> +	struct aqc111_data *aqc111_data = (struct aqc111_data *)dev->data[0];
> +	u32 speed = SPEED_UNKNOWN;
> +
> +	ethtool_link_ksettings_zero_link_mode(elk, supported);
> +	ethtool_link_ksettings_add_link_mode(elk, supported,
> +					     100baseT_Full);
> +	ethtool_link_ksettings_add_link_mode(elk, supported,
> +					     1000baseT_Full);
> +	if (usb_speed == USB_SPEED_SUPER) {

And SUPER_PLUS?

	Regards
		Oliver

^ permalink raw reply

* Re: [PATCH net-next 15/19] net: usb: aqc111: Add support for VLAN_CTAG_TX/RX offload
From: Oliver Neukum @ 2018-10-08 14:14 UTC (permalink / raw)
  To: Igor Russkikh, David S . Miller
  Cc: Dmitry Bezrukov, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <7d3c26e57edd4cc4f0995e66ec71be9d1c8f53d9.1538734658.git.igor.russkikh@aquantia.com>

On Fr, 2018-10-05 at 10:25 +0000, Igor Russkikh wrote:
> From: Dmitry Bezrukov <dmitry.bezrukov@aquantia.com>
> 
> Signed-off-by: Dmitry Bezrukov <dmitry.bezrukov@aquantia.com>
> Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
> ---
>  drivers/net/usb/aqc111.c | 14 ++++++++++++++
>  drivers/net/usb/aqc111.h |  7 ++++++-
>  2 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
> index cc23c39beab3..a9051dd7c5bd 100644
> --- a/drivers/net/usb/aqc111.c
> +++ b/drivers/net/usb/aqc111.c
> @@ -524,6 +524,7 @@ static int aqc111_bind(struct usbnet *dev, struct usb_interface *intf)
>  
>  	dev->net->hw_features |= AQ_SUPPORT_HW_FEATURE;
>  	dev->net->features |= AQ_SUPPORT_FEATURE;
> +	dev->net->vlan_features |= AQ_SUPPORT_VLAN_FEATURE;
>  
>  	aqc111_read_fw_version(dev, aqc111_data);
>  	aqc111_data->autoneg = AUTONEG_ENABLE;
> @@ -817,6 +818,7 @@ static int aqc111_reset(struct usbnet *dev)
>  
>  	dev->net->hw_features |= AQ_SUPPORT_HW_FEATURE;
>  	dev->net->features |= AQ_SUPPORT_FEATURE;
> +	dev->net->vlan_features |= AQ_SUPPORT_VLAN_FEATURE;
>  
>  	/* Power up ethernet PHY */
>  	aqc111_data->phy_ops.advertising = 0;
> @@ -992,6 +994,11 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
>  		new_skb->truesize = new_skb->len + sizeof(struct sk_buff);
>  		if (aqc111_data->rx_checksum)
>  			aqc111_rx_checksum(new_skb, &pkt_desc);
> +		if (pkt_desc->vlan_ind)
> +			__vlan_hwaccel_put_tag(new_skb,
> +					       htons(ETH_P_8021Q),
> +					       pkt_desc->vlan_tag &
> +					       VLAN_VID_MASK);
>  
>  		usbnet_skb_return(dev, new_skb);
>  		if (pkt_count == 0)
> @@ -1020,6 +1027,7 @@ static struct sk_buff *aqc111_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
>  	int tailroom = 0;
>  	int padding_size = 0;
>  	struct sk_buff *new_skb = NULL;
> +	u16 tci = 0;
>  
>  	memset(&tx_hdr, 0x00, sizeof(tx_hdr));
>  
> @@ -1038,6 +1046,12 @@ static struct sk_buff *aqc111_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
>  		tx_hdr.drop_padding = 1;
>  	}
>  
> +	/* Vlan Tag */
> +	if (vlan_get_tag(skb, &tci) >= 0) {
> +		tx_hdr.vlan_tag = 1;
> +		tx_hdr.vlan_info = tci;

Endianness

	Regards
		Oliver

^ permalink raw reply

* Re: [PATCH net-next 13/19] net: usb: aqc111: Add support for TSO
From: Oliver Neukum @ 2018-10-08 14:12 UTC (permalink / raw)
  To: Igor Russkikh, David S . Miller
  Cc: Dmitry Bezrukov, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <b4e69e6a2b05929bbba681f1bdd115eb32cb64b0.1538734658.git.igor.russkikh@aquantia.com>

On Fr, 2018-10-05 at 10:25 +0000, Igor Russkikh wrote:
> From: Dmitry Bezrukov <dmitry.bezrukov@aquantia.com>
> 
> Signed-off-by: Dmitry Bezrukov <dmitry.bezrukov@aquantia.com>
> Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
> ---
>  drivers/net/usb/aqc111.c | 3 +++
>  drivers/net/usb/aqc111.h | 6 ++++--
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
> index 6efd9a9ad44e..f61fa7446b72 100644
> --- a/drivers/net/usb/aqc111.c
> +++ b/drivers/net/usb/aqc111.c
> @@ -964,6 +964,9 @@ static struct sk_buff *aqc111_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
>  	/*Length of actual data*/
>  	tx_hdr.length = (skb->len & 0x1FFFFF);
>  
> +	/* TSO MSS */
> +	tx_hdr.max_seg_size = skb_shinfo(skb)->gso_size;

Endianness
> +
>  	headroom = (skb->len + AQ_TX_HEADER_SIZE) % 8;
>  	if (headroom != 0)
>  		padding_size = 8 - headroom;

	Regards
		Oliver

^ permalink raw reply

* Re: [PATCH net-next 05/19] net: usb: aqc111: Introduce PHY access
From: Igor Russkikh @ 2018-10-08 14:10 UTC (permalink / raw)
  To: Oliver Neukum, David S . Miller
  Cc: Dmitry Bezrukov, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <1539006774.10342.16.camel@suse.com>

Hi Oliver,

>> +	aqc111_read_cmd(dev, AQ_ACCESS_MAC, AQ_FW_VER_MAJOR,
>> +			1, 1, &aqc111_data->fw_ver.major);
>> +	aqc111_read_cmd(dev, AQ_ACCESS_MAC, AQ_FW_VER_MINOR,
>> +			1, 1, &aqc111_data->fw_ver.minor);
>> +	aqc111_read_cmd(dev, AQ_ACCESS_MAC, AQ_FW_VER_REV,
>> +			1, 1, &aqc111_data->fw_ver.rev);
> 
> Why read the stuff you don't need?

fw_ver is used below to determine phy access mode.

fw_ver.rev is not used in this exact patch, thats true,
but it gets reported in later patches in the set.

Regards,
  Igor

^ permalink raw reply

* Re: [PATCH net-next 05/19] net: usb: aqc111: Introduce PHY access
From: Igor Russkikh @ 2018-10-08 14:07 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: David S . Miller, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org, Dmitry Bezrukov
In-Reply-To: <20181008121707.GB6216@lunn.ch>


Hi Andrew,

>>>> +	struct aqc111_data *aqc111_data = (struct aqc111_data *)dev->data[0];
>>>
>>> Having to do this cast all the time is quiet ugly. It seems like some
>>> other usb_net drivers use netdev_priv().
>>
>> As I see most of usb usbnet based devices use the same theme with accessing
>> private data via dev->data.
> 
> It is just ugly. It would of been better if dev->data[] was a void
> pointer. This is the first usbnet driver i've reviewed, so i don't
> know the history behind this. I wonder if adding a void *priv would be
> accepted?

I can't comment on history of this, but...

net-next/drivers/net/usb$ grep "dev->data" * | wc -l
173

Regards,
  Igor

^ permalink raw reply

* [PATCH] qed: fix memory leak of pent on error exit paths
From: Colin King @ 2018-10-08 14:17 UTC (permalink / raw)
  To: Ariel Elior, everest-linux-l2, David S . Miller; +Cc: kernel-janitors, netdev

From: Colin Ian King <colin.king@canonical.com>

Currently, calls to qed_sp_init_request can leak pent on -EINVAL errors.
Fix this by adding the allocated pent back to the p_hwfn free_pool
on these error exits so it can be re-used later and hence fixes the
leak.

Detected by CoverityScan, CID#1411643 ("Resource leak")

Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 .../net/ethernet/qlogic/qed/qed_sp_commands.c    | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 77b6248ad3b9..810475a6522a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -79,8 +79,10 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 		break;
 
 	case QED_SPQ_MODE_BLOCK:
-		if (!p_data->p_comp_data)
-			return -EINVAL;
+		if (!p_data->p_comp_data) {
+			rc = -EINVAL;
+			goto err;
+		}
 
 		p_ent->comp_cb.cookie = p_data->p_comp_data->cookie;
 		break;
@@ -95,7 +97,8 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	default:
 		DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n",
 			  p_ent->comp_mode);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto err;
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
@@ -109,6 +112,13 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod));
 
 	return 0;
+
+err:
+	qed_spq_return_entry(p_hwfn, *pp_ent);
+	*pp_ent = NULL;
+
+	return rc;
+
 }
 
 static enum tunnel_clss qed_tunn_clss_to_fw_clss(u8 type)
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH net-next 08/19] net: usb: aqc111: Implement TX data path
From: Oliver Neukum @ 2018-10-08 14:07 UTC (permalink / raw)
  To: Igor Russkikh, Andrew Lunn
  Cc: Dmitry Bezrukov, David S . Miller, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <3d55fcaf-3297-f315-b258-2887d5a64cf6@aquantia.com>

On Mo, 2018-10-08 at 13:43 +0000, Igor Russkikh wrote:
> > > +	skb_push(skb, AQ_TX_HEADER_SIZE);
> > > +	cpu_to_le64s(&tx_hdr);
> > 
> > Is that portable? tx_hdr is a structure of 2x u32 bitfields.  What
> > endian have you tested that one?
> > 
> 
> You are right, this is wrong for BE hardware.
> 
> We don't have such a hardware to check unfortunately.
> Think its better to drop endianess conversions and declare
> the driver as little endian only.
> 
> Do you think that'll be acceptable?

No. If worse comes to worse define it u64 and set the values
manually.

	Regards
		Oliver

^ permalink raw reply

* [PATCH net-next] dpaa2-eth: Don't account Tx confirmation frames on NAPI poll
From: Ioana Ciocoi Radulescu @ 2018-10-08 14:16 UTC (permalink / raw)
  To: netdev@vger.kernel.org, davem@davemloft.net; +Cc: Ioana Ciornei

Until now, both Rx and Tx confirmation frames handled during
NAPI poll were counted toward the NAPI budget. However, Tx
confirmations are lighter to process than Rx frames, which can
skew the amount of work actually done inside one NAPI cycle.

Update the code to only count Rx frames toward the NAPI budget
and set a separate threshold on how many Tx conf frames can be
processed in one poll cycle.

The NAPI poll routine stops when either the budget is consumed
by Rx frames or when Tx confirmation frames reach this threshold.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 68 +++++++++++++++---------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  5 ++
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 108c137..156080d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -289,10 +289,11 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
  *
  * Observance of NAPI budget is not our concern, leaving that to the caller.
  */
-static int consume_frames(struct dpaa2_eth_channel *ch)
+static int consume_frames(struct dpaa2_eth_channel *ch,
+			  enum dpaa2_eth_fq_type *type)
 {
 	struct dpaa2_eth_priv *priv = ch->priv;
-	struct dpaa2_eth_fq *fq;
+	struct dpaa2_eth_fq *fq = NULL;
 	struct dpaa2_dq *dq;
 	const struct dpaa2_fd *fd;
 	int cleaned = 0;
@@ -311,12 +312,23 @@ static int consume_frames(struct dpaa2_eth_channel *ch)
 
 		fd = dpaa2_dq_fd(dq);
 		fq = (struct dpaa2_eth_fq *)(uintptr_t)dpaa2_dq_fqd_ctx(dq);
-		fq->stats.frames++;
 
 		fq->consume(priv, ch, fd, &ch->napi, fq->flowid);
 		cleaned++;
 	} while (!is_last);
 
+	if (!cleaned)
+		return 0;
+
+	fq->stats.frames += cleaned;
+	ch->stats.frames += cleaned;
+
+	/* A dequeue operation only pulls frames from a single queue
+	 * into the store. Return the frame queue type as an out param.
+	 */
+	if (type)
+		*type = fq->type;
+
 	return cleaned;
 }
 
@@ -921,14 +933,16 @@ static int pull_channel(struct dpaa2_eth_channel *ch)
 static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 {
 	struct dpaa2_eth_channel *ch;
-	int cleaned = 0, store_cleaned;
 	struct dpaa2_eth_priv *priv;
+	int rx_cleaned = 0, txconf_cleaned = 0;
+	enum dpaa2_eth_fq_type type;
+	int store_cleaned;
 	int err;
 
 	ch = container_of(napi, struct dpaa2_eth_channel, napi);
 	priv = ch->priv;
 
-	while (cleaned < budget) {
+	do {
 		err = pull_channel(ch);
 		if (unlikely(err))
 			break;
@@ -936,30 +950,32 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 		/* Refill pool if appropriate */
 		refill_pool(priv, ch, priv->bpid);
 
-		store_cleaned = consume_frames(ch);
-		cleaned += store_cleaned;
+		store_cleaned = consume_frames(ch, &type);
+		if (type == DPAA2_RX_FQ)
+			rx_cleaned += store_cleaned;
+		else
+			txconf_cleaned += store_cleaned;
 
-		/* If we have enough budget left for a full store,
-		 * try a new pull dequeue, otherwise we're done here
+		/* If we either consumed the whole NAPI budget with Rx frames
+		 * or we reached the Tx confirmations threshold, we're done.
 		 */
-		if (store_cleaned == 0 ||
-		    cleaned > budget - DPAA2_ETH_STORE_SIZE)
-			break;
-	}
-
-	if (cleaned < budget && napi_complete_done(napi, cleaned)) {
-		/* Re-enable data available notifications */
-		do {
-			err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx);
-			cpu_relax();
-		} while (err == -EBUSY);
-		WARN_ONCE(err, "CDAN notifications rearm failed on core %d",
-			  ch->nctx.desired_cpu);
-	}
+		if (rx_cleaned >= budget ||
+		    txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI)
+			return budget;
+	} while (store_cleaned);
 
-	ch->stats.frames += cleaned;
+	/* We didn't consume the entire budget, so finish napi and
+	 * re-enable data availability notifications
+	 */
+	napi_complete_done(napi, rx_cleaned);
+	do {
+		err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx);
+		cpu_relax();
+	} while (err == -EBUSY);
+	WARN_ONCE(err, "CDAN notifications rearm failed on core %d",
+		  ch->nctx.desired_cpu);
 
-	return cleaned;
+	return max(rx_cleaned, 1);
 }
 
 static void enable_ch_napi(struct dpaa2_eth_priv *priv)
@@ -1076,7 +1092,7 @@ static u32 drain_channel(struct dpaa2_eth_priv *priv,
 
 	do {
 		pull_channel(ch);
-		drained = consume_frames(ch);
+		drained = consume_frames(ch, NULL);
 		total += drained;
 	} while (drained);
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 7a7a3e7..452a8e9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -40,6 +40,11 @@
  */
 #define DPAA2_ETH_TAILDROP_THRESH	(64 * 1024)
 
+/* Maximum number of Tx confirmation frames to be processed
+ * in a single NAPI call
+ */
+#define DPAA2_ETH_TXCONF_PER_NAPI	256
+
 /* Buffer quota per queue. Must be large enough such that for minimum sized
  * frames taildrop kicks in before the bpool gets depleted, so we compute
  * how many 64B frames fit inside the taildrop threshold and add a margin
-- 
2.7.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox