Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCHv2 net-next 3/5] sctp: clean up __sctp_connect
From: Xin Long @ 2019-07-30 12:38 UTC (permalink / raw)
  To: network dev, linux-sctp; +Cc: Marcelo Ricardo Leitner, Neil Horman, davem
In-Reply-To: <cover.1564490276.git.lucien.xin@gmail.com>

__sctp_connect is doing quit similar things as sctp_sendmsg_new_asoc.
To factor out common functions, this patch is to clean up their code
to make them look more similar:

  1. create the asoc and add a peer with the 1st addr.
  2. add peers with the other addrs into this asoc one by one.

while at it, also remove the unused 'addrcnt'.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/socket.c | 209 +++++++++++++++++++-----------------------------------
 1 file changed, 73 insertions(+), 136 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e9c5b39..b9804e5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1049,153 +1049,105 @@ static int sctp_setsockopt_bindx(struct sock *sk,
  * Common routine for handling connect() and sctp_connectx().
  * Connect will come in with just a single address.
  */
-static int __sctp_connect(struct sock *sk,
-			  struct sockaddr *kaddrs,
-			  int addrs_size, int flags,
-			  sctp_assoc_t *assoc_id)
+static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
+			  int addrs_size, int flags, sctp_assoc_t *assoc_id)
 {
-	struct net *net = sock_net(sk);
-	struct sctp_sock *sp;
-	struct sctp_endpoint *ep;
-	struct sctp_association *asoc = NULL;
-	struct sctp_association *asoc2;
+	struct sctp_association *old, *asoc;
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_endpoint *ep = sp->ep;
 	struct sctp_transport *transport;
-	union sctp_addr to;
+	struct net *net = sock_net(sk);
+	void *addr_buf = kaddrs;
+	union sctp_addr *daddr;
 	enum sctp_scope scope;
+	struct sctp_af *af;
+	int walk_size, err;
 	long timeo;
-	int err = 0;
-	int addrcnt = 0;
-	int walk_size = 0;
-	union sctp_addr *sa_addr = NULL;
-	void *addr_buf;
-	unsigned short port;
 
-	sp = sctp_sk(sk);
-	ep = sp->ep;
-
-	/* connect() cannot be done on a socket that is already in ESTABLISHED
-	 * state - UDP-style peeled off socket or a TCP-style socket that
-	 * is already connected.
-	 * It cannot be done even on a TCP-style listening socket.
-	 */
 	if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) ||
-	    (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
-		err = -EISCONN;
-		goto out_free;
+	    (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)))
+		return -EISCONN;
+
+	daddr = addr_buf;
+	af = sctp_get_af_specific(daddr->sa.sa_family);
+	if (!af || af->sockaddr_len > addrs_size)
+		return -EINVAL;
+
+	err = sctp_verify_addr(sk, daddr, af->sockaddr_len);
+	if (err)
+		return err;
+
+	asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+	if (asoc)
+		return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+							     : -EALREADY;
+
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
+
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
+	} else {
+		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+			return -EACCES;
 	}
 
-	/* Walk through the addrs buffer and count the number of addresses. */
-	addr_buf = kaddrs;
-	while (walk_size < addrs_size) {
-		struct sctp_af *af;
+	scope = sctp_scope(daddr);
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc)
+		return -ENOMEM;
 
-		if (walk_size + sizeof(sa_family_t) > addrs_size) {
-			err = -EINVAL;
-			goto out_free;
-		}
+	err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
+	if (err < 0)
+		goto out_free;
 
-		sa_addr = addr_buf;
-		af = sctp_get_af_specific(sa_addr->sa.sa_family);
+	transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!transport) {
+		err = -ENOMEM;
+		goto out_free;
+	}
 
-		/* If the address family is not supported or if this address
-		 * causes the address buffer to overflow return EINVAL.
-		 */
-		if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
-			err = -EINVAL;
+	addr_buf += af->sockaddr_len;
+	walk_size = af->sockaddr_len;
+	while (walk_size < addrs_size) {
+		err = -EINVAL;
+		if (walk_size + sizeof(sa_family_t) > addrs_size)
 			goto out_free;
-		}
 
-		port = ntohs(sa_addr->v4.sin_port);
-
-		/* Save current address so we can work with it */
-		memcpy(&to, sa_addr, af->sockaddr_len);
+		daddr = addr_buf;
+		af = sctp_get_af_specific(daddr->sa.sa_family);
+		if (!af || af->sockaddr_len + walk_size > addrs_size)
+			goto out_free;
 
-		err = sctp_verify_addr(sk, &to, af->sockaddr_len);
-		if (err)
+		if (asoc->peer.port != ntohs(daddr->v4.sin_port))
 			goto out_free;
 
-		/* Make sure the destination port is correctly set
-		 * in all addresses.
-		 */
-		if (asoc && asoc->peer.port && asoc->peer.port != port) {
-			err = -EINVAL;
+		err = sctp_verify_addr(sk, daddr, af->sockaddr_len);
+		if (err)
 			goto out_free;
-		}
 
-		/* Check if there already is a matching association on the
-		 * endpoint (other than the one created here).
-		 */
-		asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport);
-		if (asoc2 && asoc2 != asoc) {
-			if (asoc2->state >= SCTP_STATE_ESTABLISHED)
-				err = -EISCONN;
-			else
-				err = -EALREADY;
+		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (old && old != asoc) {
+			err = old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+								   : -EALREADY;
 			goto out_free;
 		}
 
-		/* If we could not find a matching association on the endpoint,
-		 * make sure that there is no peeled-off association matching
-		 * the peer address even on another socket.
-		 */
-		if (sctp_endpoint_is_peeled_off(ep, &to)) {
+		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
 			err = -EADDRNOTAVAIL;
 			goto out_free;
 		}
 
-		if (!asoc) {
-			/* If a bind() or sctp_bindx() is not called prior to
-			 * an sctp_connectx() call, the system picks an
-			 * ephemeral port and will choose an address set
-			 * equivalent to binding with a wildcard address.
-			 */
-			if (!ep->base.bind_addr.port) {
-				if (sctp_autobind(sk)) {
-					err = -EAGAIN;
-					goto out_free;
-				}
-			} else {
-				/*
-				 * If an unprivileged user inherits a 1-many
-				 * style socket with open associations on a
-				 * privileged port, it MAY be permitted to
-				 * accept new associations, but it SHOULD NOT
-				 * be permitted to open new associations.
-				 */
-				if (ep->base.bind_addr.port <
-				    inet_prot_sock(net) &&
-				    !ns_capable(net->user_ns,
-				    CAP_NET_BIND_SERVICE)) {
-					err = -EACCES;
-					goto out_free;
-				}
-			}
-
-			scope = sctp_scope(&to);
-			asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-			if (!asoc) {
-				err = -ENOMEM;
-				goto out_free;
-			}
-
-			err = sctp_assoc_set_bind_addr_from_ep(asoc, scope,
-							      GFP_KERNEL);
-			if (err < 0) {
-				goto out_free;
-			}
-
-		}
-
-		/* Prime the peer's transport structures.  */
-		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL,
+		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
 						SCTP_UNKNOWN);
 		if (!transport) {
 			err = -ENOMEM;
 			goto out_free;
 		}
 
-		addrcnt++;
-		addr_buf += af->sockaddr_len;
+		addr_buf  += af->sockaddr_len;
 		walk_size += af->sockaddr_len;
 	}
 
@@ -1209,39 +1161,24 @@ static int __sctp_connect(struct sock *sk,
 	}
 
 	err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-	if (err < 0) {
+	if (err < 0)
 		goto out_free;
-	}
 
 	/* Initialize sk's dport and daddr for getpeername() */
 	inet_sk(sk)->inet_dport = htons(asoc->peer.port);
-	sp->pf->to_sk_daddr(sa_addr, sk);
+	sp->pf->to_sk_daddr(daddr, sk);
 	sk->sk_err = 0;
 
-	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
 	if (assoc_id)
 		*assoc_id = asoc->assoc_id;
 
-	err = sctp_wait_for_connect(asoc, &timeo);
-	/* Note: the asoc may be freed after the return of
-	 * sctp_wait_for_connect.
-	 */
-
-	/* Don't free association on exit. */
-	asoc = NULL;
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+	return sctp_wait_for_connect(asoc, &timeo);
 
 out_free:
 	pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n",
 		 __func__, asoc, kaddrs, err);
-
-	if (asoc) {
-		/* sctp_primitive_ASSOCIATE may have added this association
-		 * To the hash table, try to unhash it, just in case, its a noop
-		 * if it wasn't hashed so we're safe
-		 */
-		sctp_association_free(asoc);
-	}
+	sctp_association_free(asoc);
 	return err;
 }
 
-- 
2.1.0


^ permalink raw reply related

* [PATCHv2 net-next 4/5] sctp: factor out sctp_connect_new_asoc
From: Xin Long @ 2019-07-30 12:38 UTC (permalink / raw)
  To: network dev, linux-sctp; +Cc: Marcelo Ricardo Leitner, Neil Horman, davem
In-Reply-To: <cover.1564490276.git.lucien.xin@gmail.com>

In this function factored out from sctp_sendmsg_new_asoc() and
__sctp_connect(), it creates the asoc and adds a peer with the
1st addr.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/socket.c | 160 ++++++++++++++++++++++++++----------------------------
 1 file changed, 76 insertions(+), 84 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b9804e5..6f77853 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1044,6 +1044,73 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 	return err;
 }
 
+static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
+				 const union sctp_addr *daddr,
+				 const struct sctp_initmsg *init,
+				 struct sctp_transport **tp)
+{
+	struct sctp_association *asoc;
+	struct sock *sk = ep->base.sk;
+	struct net *net = sock_net(sk);
+	enum sctp_scope scope;
+	int err;
+
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
+
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
+	} else {
+		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+			return -EACCES;
+	}
+
+	scope = sctp_scope(daddr);
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc)
+		return -ENOMEM;
+
+	err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
+	if (err < 0)
+		goto free;
+
+	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!*tp) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	if (!init)
+		return 0;
+
+	if (init->sinit_num_ostreams) {
+		__u16 outcnt = init->sinit_num_ostreams;
+
+		asoc->c.sinit_num_ostreams = outcnt;
+		/* outcnt has been changed, need to re-init stream */
+		err = sctp_stream_init(&asoc->stream, outcnt, 0, GFP_KERNEL);
+		if (err)
+			goto free;
+	}
+
+	if (init->sinit_max_instreams)
+		asoc->c.sinit_max_instreams = init->sinit_max_instreams;
+
+	if (init->sinit_max_attempts)
+		asoc->max_init_attempts = init->sinit_max_attempts;
+
+	if (init->sinit_max_init_timeo)
+		asoc->max_init_timeo =
+			msecs_to_jiffies(init->sinit_max_init_timeo);
+
+	return 0;
+free:
+	sctp_association_free(asoc);
+	return err;
+}
+
 /* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
  *
  * Common routine for handling connect() and sctp_connectx().
@@ -1056,10 +1123,8 @@ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
 	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_endpoint *ep = sp->ep;
 	struct sctp_transport *transport;
-	struct net *net = sock_net(sk);
 	void *addr_buf = kaddrs;
 	union sctp_addr *daddr;
-	enum sctp_scope scope;
 	struct sctp_af *af;
 	int walk_size, err;
 	long timeo;
@@ -1082,32 +1147,10 @@ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
 		return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
 							     : -EALREADY;
 
-	if (sctp_endpoint_is_peeled_off(ep, daddr))
-		return -EADDRNOTAVAIL;
-
-	if (!ep->base.bind_addr.port) {
-		if (sctp_autobind(sk))
-			return -EAGAIN;
-	} else {
-		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
-			return -EACCES;
-	}
-
-	scope = sctp_scope(daddr);
-	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-	if (!asoc)
-		return -ENOMEM;
-
-	err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
-	if (err < 0)
-		goto out_free;
-
-	transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
-	if (!transport) {
-		err = -ENOMEM;
-		goto out_free;
-	}
+	err = sctp_connect_new_asoc(ep, daddr, NULL, &transport);
+	if (err)
+		return err;
+	asoc = transport->asoc;
 
 	addr_buf += af->sockaddr_len;
 	walk_size = af->sockaddr_len;
@@ -1160,7 +1203,7 @@ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
 			goto out_free;
 	}
 
-	err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
+	err = sctp_primitive_ASSOCIATE(sock_net(sk), asoc, NULL);
 	if (err < 0)
 		goto out_free;
 
@@ -1597,9 +1640,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 				 struct sctp_transport **tp)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-	struct net *net = sock_net(sk);
 	struct sctp_association *asoc;
-	enum sctp_scope scope;
 	struct cmsghdr *cmsg;
 	__be32 flowinfo = 0;
 	struct sctp_af *af;
@@ -1614,20 +1655,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 				    sctp_sstate(sk, CLOSING)))
 		return -EADDRNOTAVAIL;
 
-	if (sctp_endpoint_is_peeled_off(ep, daddr))
-		return -EADDRNOTAVAIL;
-
-	if (!ep->base.bind_addr.port) {
-		if (sctp_autobind(sk))
-			return -EAGAIN;
-	} else {
-		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
-			return -EACCES;
-	}
-
-	scope = sctp_scope(daddr);
-
 	/* Label connection socket for first association 1-to-many
 	 * style for client sequence socket()->sendmsg(). This
 	 * needs to be done before sctp_assoc_add_peer() as that will
@@ -1643,45 +1670,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 	if (err < 0)
 		return err;
 
-	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-	if (!asoc)
-		return -ENOMEM;
-
-	if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
-		err = -ENOMEM;
-		goto free;
-	}
-
-	if (cmsgs->init) {
-		struct sctp_initmsg *init = cmsgs->init;
-
-		if (init->sinit_num_ostreams) {
-			__u16 outcnt = init->sinit_num_ostreams;
-
-			asoc->c.sinit_num_ostreams = outcnt;
-			/* outcnt has been changed, need to re-init stream */
-			err = sctp_stream_init(&asoc->stream, outcnt, 0,
-					       GFP_KERNEL);
-			if (err)
-				goto free;
-		}
-
-		if (init->sinit_max_instreams)
-			asoc->c.sinit_max_instreams = init->sinit_max_instreams;
-
-		if (init->sinit_max_attempts)
-			asoc->max_init_attempts = init->sinit_max_attempts;
-
-		if (init->sinit_max_init_timeo)
-			asoc->max_init_timeo =
-				msecs_to_jiffies(init->sinit_max_init_timeo);
-	}
-
-	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
-	if (!*tp) {
-		err = -ENOMEM;
-		goto free;
-	}
+	err = sctp_connect_new_asoc(ep, daddr, cmsgs->init, tp);
+	if (err)
+		return err;
+	asoc = (*tp)->asoc;
 
 	if (!cmsgs->addrs_msg)
 		return 0;
-- 
2.1.0


^ permalink raw reply related

* [PATCHv2 net-next 5/5] sctp: factor out sctp_connect_add_peer
From: Xin Long @ 2019-07-30 12:38 UTC (permalink / raw)
  To: network dev, linux-sctp; +Cc: Marcelo Ricardo Leitner, Neil Horman, davem
In-Reply-To: <cover.1564490276.git.lucien.xin@gmail.com>

In this function factored out from sctp_sendmsg_new_asoc() and
__sctp_connect(), it adds a peer with the other addr into the
asoc after this asoc is created with the 1st addr.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/socket.c | 76 +++++++++++++++++++++++--------------------------------
 1 file changed, 31 insertions(+), 45 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6f77853..2f7e88c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1111,6 +1111,33 @@ static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
 	return err;
 }
 
+static int sctp_connect_add_peer(struct sctp_association *asoc,
+				 union sctp_addr *daddr, int addr_len)
+{
+	struct sctp_endpoint *ep = asoc->ep;
+	struct sctp_association *old;
+	struct sctp_transport *t;
+	int err;
+
+	err = sctp_verify_addr(ep->base.sk, daddr, addr_len);
+	if (err)
+		return err;
+
+	old = sctp_endpoint_lookup_assoc(ep, daddr, &t);
+	if (old && old != asoc)
+		return old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+							    : -EALREADY;
+
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
+
+	t = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!t)
+		return -ENOMEM;
+
+	return 0;
+}
+
 /* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
  *
  * Common routine for handling connect() and sctp_connectx().
@@ -1119,10 +1146,10 @@ static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
 static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
 			  int addrs_size, int flags, sctp_assoc_t *assoc_id)
 {
-	struct sctp_association *old, *asoc;
 	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_endpoint *ep = sp->ep;
 	struct sctp_transport *transport;
+	struct sctp_association *asoc;
 	void *addr_buf = kaddrs;
 	union sctp_addr *daddr;
 	struct sctp_af *af;
@@ -1167,29 +1194,10 @@ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
 		if (asoc->peer.port != ntohs(daddr->v4.sin_port))
 			goto out_free;
 
-		err = sctp_verify_addr(sk, daddr, af->sockaddr_len);
+		err = sctp_connect_add_peer(asoc, daddr, af->sockaddr_len);
 		if (err)
 			goto out_free;
 
-		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
-		if (old && old != asoc) {
-			err = old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
-								   : -EALREADY;
-			goto out_free;
-		}
-
-		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
-			err = -EADDRNOTAVAIL;
-			goto out_free;
-		}
-
-		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
-						SCTP_UNKNOWN);
-		if (!transport) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-
 		addr_buf  += af->sockaddr_len;
 		walk_size += af->sockaddr_len;
 	}
@@ -1683,8 +1691,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 
 	/* sendv addr list parse */
 	for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
-		struct sctp_transport *transport;
-		struct sctp_association *old;
 		union sctp_addr _daddr;
 		int dlen;
 
@@ -1718,30 +1724,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 			daddr->v6.sin6_port = htons(asoc->peer.port);
 			memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
 		}
-		err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
-		if (err)
-			goto free;
-
-		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
-		if (old && old != asoc) {
-			if (old->state >= SCTP_STATE_ESTABLISHED)
-				err = -EISCONN;
-			else
-				err = -EALREADY;
-			goto free;
-		}
 
-		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
-			err = -EADDRNOTAVAIL;
-			goto free;
-		}
-
-		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
-						SCTP_UNKNOWN);
-		if (!transport) {
-			err = -ENOMEM;
+		err = sctp_connect_add_peer(asoc, daddr, sizeof(*daddr));
+		if (err)
 			goto free;
-		}
 	}
 
 	return 0;
-- 
2.1.0


^ permalink raw reply related

* Re: [PATCH] bridge:fragmented packets dropped by bridge
From: Nikolay Aleksandrov @ 2019-07-30 12:41 UTC (permalink / raw)
  To: Rundong Ge, davem
  Cc: kuznet, yoshfuji, netdev, pablo, kadlec, fw, roopa,
	netfilter-devel, coreteam, bridge, linux-kernel
In-Reply-To: <20190730122534.30687-1-rdong.ge@gmail.com>

On 30/07/2019 15:25, Rundong Ge wrote:
> Given following setup:
> -modprobe br_netfilter
> -echo '1' > /proc/sys/net/bridge/bridge-nf-call-iptables
> -brctl addbr br0
> -brctl addif br0 enp2s0
> -brctl addif br0 enp3s0
> -brctl addif br0 enp6s0
> -ifconfig enp2s0 mtu 1300
> -ifconfig enp3s0 mtu 1500
> -ifconfig enp6s0 mtu 1500
> -ifconfig br0 up
> 
>                  multi-port
> mtu1500 - mtu1500|bridge|1500 - mtu1500
>   A                  |            B
>                    mtu1300
> 
> With netfilter defragmentation/conntrack enabled, fragmented
> packets from A will be defragmented in prerouting, and refragmented
> at postrouting.
> But in this scenario the bridge found the frag_max_size(1500) is
> larger than the dst mtu stored in the fake_rtable whitch is
> always equal to the bridge's mtu 1300, then packets will be dopped.
> 
> This modifies ip_skb_dst_mtu to use the out dev's mtu instead
> of bridge's mtu in bridge refragment.
> 
> Signed-off-by: Rundong Ge <rdong.ge@gmail.com>
> ---
>  include/net/ip.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 29d89de..0512de3 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -450,6 +450,8 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
>  static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
>  					  const struct sk_buff *skb)
>  {
> +	if ((skb_dst(skb)->flags & DST_FAKE_RTABLE) && skb->dev)
> +		return min(skb->dev->mtu, IP_MAX_MTU);
>  	if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
>  		bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
>  
> 

I don't think this is correct, there's a reason why the bridge chooses the smallest
possible MTU out of its members and this is simply a hack to circumvent it.
If you really like to do so just set the bridge MTU manually, we've added support
so it won't change automatically to the smallest, but then how do you pass packets
1500 -> 1300 in this setup ?

You're talking about the frag_size check in br_nf_ip_fragment(), right ?


^ permalink raw reply

* [PATCH] net: usb: pegasus: fix improper read if get_registers() fail
From: Denis Kirjanov @ 2019-07-30 12:45 UTC (permalink / raw)
  To: petkan, davem; +Cc: netdev, Denis Kirjanov

get_registers() may fail with -ENOMEM and in this
case we can read a garbage from the status variable tmp.

Reported-by: syzbot+3499a83b2d062ae409d4@syzkaller.appspotmail.com
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
---
 drivers/net/usb/pegasus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 6d25dea5ad4b..f7d117d80cfb 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -282,7 +282,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val)
 static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
 {
 	int i;
-	__u8 tmp;
+	__u8 tmp = 0;
 	__le16 retdatai;
 	int ret;
 
-- 
2.12.3


^ permalink raw reply related

* [PATCH] net: usb: pegasus: fix improper read if get_registers() fail
From: Denis Kirjanov @ 2019-07-30 13:13 UTC (permalink / raw)
  To: petkan, davem; +Cc: netdev, Denis Kirjanov

get_registers() may fail with -ENOMEM and in this
case we can read a garbage from the status variable tmp.

Reported-by: syzbot+3499a83b2d062ae409d4@syzkaller.appspotmail.com
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
---
 drivers/net/usb/pegasus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 6d25dea5ad4b..f7d117d80cfb 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -282,7 +282,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val)
 static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
 {
 	int i;
-	__u8 tmp;
+	__u8 tmp = 0;
 	__le16 retdatai;
 	int ret;
 
-- 
2.12.3


^ permalink raw reply related

* Re: [PATCH net-next 2/2] net: phy: broadcom: add 1000Base-X support for BCM54616S
From: Andrew Lunn @ 2019-07-30 13:17 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Tao Ren, Florian Fainelli, Heiner Kallweit, David S . Miller,
	Arun Parameswaran, Justin Chen, netdev, lkml, Andrew Jeffery,
	openbmc@lists.ozlabs.org
In-Reply-To: <CA+h21ho1KOGS3WsNBHzfHkpSyE4k5HTE1tV9wUtnkZhjUZGeUw@mail.gmail.com>

> Again, I don't think Linux has generic support for overwriting (or
> even describing) the operating mode of a PHY, although maybe that's a
> direction we would want to push the discussion towards. RGMII to
> copper, RGMII to fiber, SGMII to copper, copper to fiber (media
> converter), even RGMII to SGMII (RTL8211FS supports this) - lots of
> modes, and this is only for gigabit PHYs...

This is something Russell King has PHYLINK patches for, which have not
yet been merged. There are some boards which use a PHY as a media
converter, placed between the MAC and an SFP.

	   Andrew

^ permalink raw reply

* Re: [PATCH] net/socket: fix GCC8+ Wpacked-not-aligned warnings
From: Qian Cai @ 2019-07-30 13:18 UTC (permalink / raw)
  To: David Laight, davem@davemloft.net
  Cc: vyasevich@gmail.com, nhorman@tuxdriver.com,
	marcelo.leitner@gmail.com, linux-sctp@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <91237fd501de4ab895305c4d5666d822@AcuMS.aculab.com>

On Tue, 2019-07-30 at 09:01 +0000, David Laight wrote:
> From: Qian Cai
> > Sent: 29 July 2019 21:24
> 
> ..
> > To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as
> > it is only used in those packed sctp structure which is part of UAPI,
> > and "struct __kernel_sockaddr_storage" is used in some other
> > places of UAPI that need not to change alignments in order to not
> > breaking userspace.
> > 
> > One option is use typedef between "sockaddr_storage" and
> > "__kernel_sockaddr_storage" but it needs to change 35 or 370 lines of
> > codes. The other option is to duplicate this simple 2-field structure to
> > have a separate "struct sockaddr_storage" so it can use a different
> > alignment than "__kernel_sockaddr_storage". Also the structure seems
> > stable enough, so it will be low-maintenance to update two structures in
> > the future in case of changes.
> 
> Does it all work if the 8 byte alignment is implicit, not explicit?
> For instance if unnamed union and struct are used eg:
> 
> struct sockaddr_storage {
> 	union {
> 		void * __ptr;  /* Force alignment */
> 		struct {
> 			__kernel_sa_family_t	ss_family;		/*
> address family */
> 			/* Following field(s) are implementation specific */
> 			char	__data[_K_SS_MAXSIZE - sizeof(unsigned
> short)];
> 					/* space to achieve desired size, */
> 					/* _SS_MAXSIZE value minus size of
> ss_family */
> 		};
> 	};
> };
> 
> I suspect unnamed unions and structs have to be allowed by the compiler.

I believe this will suffer the same problem in that will break UAPI,

https://lore.kernel.org/lkml/20190726213045.GL6204@localhost.localdomain/

^ permalink raw reply

* Re: [PATCH net-next v4 2/4] enetc: Add mdio bus driver for the PCIe MDIO endpoint
From: Andrew Lunn @ 2019-07-30 13:22 UTC (permalink / raw)
  To: Claudiu Manoil
  Cc: David S . Miller, Rob Herring, Li Yang, alexandru.marginean,
	netdev, devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1564479919-18835-3-git-send-email-claudiu.manoil@nxp.com>

On Tue, Jul 30, 2019 at 12:45:17PM +0300, Claudiu Manoil wrote:
> ENETC ports can manage the MDIO bus via local register
> interface.  However there's also a centralized way
> to manage the MDIO bus, via the MDIO PCIe endpoint
> device integrated by the same root complex that also
> integrates the ENETC ports (eth controllers).
> 
> Depending on board design and use case, centralized
> access to MDIO may be better than using local ENETC
> port registers.  For instance, on the LS1028A QDS board
> where MDIO muxing is required.  Also, the LS1028A on-chip
> switch doesn't have a local MDIO register interface.
> 
> The current patch registers the above PCIe endpoint as a
> separate MDIO bus and provides a driver for it by re-using
> the code used for local MDIO access.  It also allows the
> ENETC port PHYs to be managed by this driver if the local
> "mdio" node is missing from the ENETC port node.
> 
> Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* RE: [PATCH] net/socket: fix GCC8+ Wpacked-not-aligned warnings
From: David Laight @ 2019-07-30 13:23 UTC (permalink / raw)
  To: 'Qian Cai', davem@davemloft.net
  Cc: vyasevich@gmail.com, nhorman@tuxdriver.com,
	marcelo.leitner@gmail.com, linux-sctp@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1564492704.11067.28.camel@lca.pw>

From: Qian Cai 
> Sent: 30 July 2019 14:18
> On Tue, 2019-07-30 at 09:01 +0000, David Laight wrote:
> > From: Qian Cai
> > > Sent: 29 July 2019 21:24
> >
> > ..
> > > To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as
> > > it is only used in those packed sctp structure which is part of UAPI,
> > > and "struct __kernel_sockaddr_storage" is used in some other
> > > places of UAPI that need not to change alignments in order to not
> > > breaking userspace.
> > >
> > > One option is use typedef between "sockaddr_storage" and
> > > "__kernel_sockaddr_storage" but it needs to change 35 or 370 lines of
> > > codes. The other option is to duplicate this simple 2-field structure to
> > > have a separate "struct sockaddr_storage" so it can use a different
> > > alignment than "__kernel_sockaddr_storage". Also the structure seems
> > > stable enough, so it will be low-maintenance to update two structures in
> > > the future in case of changes.
> >
> > Does it all work if the 8 byte alignment is implicit, not explicit?
> > For instance if unnamed union and struct are used eg:
> >
> > struct sockaddr_storage {
> > 	union {
> > 		void * __ptr;  /* Force alignment */
> > 		struct {
> > 			__kernel_sa_family_t	ss_family;		/* address family */
> > 			/* Following field(s) are implementation specific */
> > 			char	__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
> > 					/* space to achieve desired size, */
> > 					/* _SS_MAXSIZE value minus size of ss_family */
> > 		};
> > 	};
> > };
> >
> > I suspect unnamed unions and structs have to be allowed by the compiler.
> 
> I believe this will suffer the same problem in that will break UAPI,
> 
> https://lore.kernel.org/lkml/20190726213045.GL6204@localhost.localdomain/

You are missing the bit where the UAPI structure is packed.
If the compiler won't let you 'pack' a structure that contains structures
(rather than just integers) then the compiler is broken!

The hope here was that it would be ok is the alignment was implicit not explicit.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply

* Re: [PATCH] net: phy: fixed_phy: print gpio error only if gpio node is present
From: Andrew Lunn @ 2019-07-30 13:30 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Florian Fainelli, Heiner Kallweit,
	David S. Miller
In-Reply-To: <20190730094623.31640-1-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 11:46:23AM +0200, Hubert Feurstein wrote:
> It is perfectly ok to not have an gpio attached to the fixed-link node. So
> the driver should not throw an error message when the gpio is missing.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: DSA Rate Limiting in 88E6390
From: Andrew Lunn @ 2019-07-30 13:31 UTC (permalink / raw)
  To: Benjamin Beckmeyer; +Cc: netdev
In-Reply-To: <fd08b6b3-3170-bf44-2f05-a0dd92ea868d@eks-engel.de>

> Hi Andrew,
> I've searched the netdev mailing list for DSA and traffic, but can't find anything
> about rate limiting till 2016. Do you have a hint, how I can find it?

I will try to find it later today.
 
> Do you know if the patchset was for Marvell or maybe for another company?

It was another switch vendor.

   Andrew

^ permalink raw reply

* Re: [PATCH 1/4] net: dsa: mv88e6xxx: add support for MV88E6220
From: Andrew Lunn @ 2019-07-30 13:36 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730100429.32479-2-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:04:26PM +0200, Hubert Feurstein wrote:
> The MV88E6220 is almost the same as MV88E6250 except that the ports 2-4 are
> not routed to pins. So the usable ports are 0, 1, 5 and 6.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
> ---
>  drivers/net/dsa/mv88e6xxx/chip.c | 25 +++++++++++++++++++++++++
>  drivers/net/dsa/mv88e6xxx/chip.h |  3 ++-
>  drivers/net/dsa/mv88e6xxx/port.h |  1 +
>  3 files changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
> index 6b17cd961d06..c4982ced908e 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -4283,6 +4283,31 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
>  		.ops = &mv88e6240_ops,
>  	},

Hi Hubert

We try to keep all these lists in strict numerical order. Please can
you add 6220 before 6240, in all the places you have added it.

    Thanks
	Andrew

^ permalink raw reply

* Re: [PATCH net-next 3/3] net: stmmac: Introducing support for Page Pool
From: Jon Hunter @ 2019-07-30 13:36 UTC (permalink / raw)
  To: Jose Abreu, Robin Murphy, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, linux-stm32@st-md-mailman.stormreply.com,
	linux-arm-kernel@lists.infradead.org, Catalin Marinas,
	Will Deacon
  Cc: Joao Pinto, Alexandre Torgue, Maxime Ripard, Chen-Yu Tsai,
	Maxime Coquelin, linux-tegra, Giuseppe Cavallaro,
	David S . Miller
In-Reply-To: <BN8PR12MB32664E23137805984F6FB2DAD3DC0@BN8PR12MB3266.namprd12.prod.outlook.com>


On 30/07/2019 10:39, Jose Abreu wrote:

...

> I looked at netsec implementation and I noticed that we are syncing the 
> old buffer for device instead of the new one. netsec syncs the buffer 
> for device immediately after the allocation which may be what we have to 
> do. Maybe the attached patch can make things work for you ?

Great! This one works. I have booted this several times and I am no
longer seeing any issues. Thanks for figuring this out!

Feel free to add my ...

Tested-by: Jon Hunter <jonathanh@nvidia.com>

Cheers
Jon

-- 
nvpublic

^ permalink raw reply

* Re: [PATCH 2/4] dt-bindings: net: dsa: marvell: add 6220 model to the 6250 family
From: Andrew Lunn @ 2019-07-30 13:37 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730100429.32479-3-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:04:27PM +0200, Hubert Feurstein wrote:
> The MV88E6220 is part of the MV88E6250 family.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH 3/4] net: dsa: mv88e6xxx: setup message port is not supported in the 6250 family
From: Andrew Lunn @ 2019-07-30 13:40 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730100429.32479-4-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:04:28PM +0200, Hubert Feurstein wrote:
> The MV88E6250 family doesn't support the MV88E6XXX_PORT_CTL1_MESSAGE_PORT
> bit.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH net v2] net: ipv6: Fix a bug in ndisc_send_ns when netdev only has a global address
From: David Ahern @ 2019-07-30 13:45 UTC (permalink / raw)
  To: Su Yanjun, davem, kuznet, yoshfuji; +Cc: netdev, linux-kernel
In-Reply-To: <1564454115-66184-1-git-send-email-suyj.fnst@cn.fujitsu.com>

On 7/29/19 8:35 PM, Su Yanjun wrote:
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index 083cc1c..156c027 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -603,11 +603,14 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
>  	int inc_opt = dev->addr_len;
>  	int optlen = 0;
>  	struct nd_msg *msg;
> +	u32 banned_flags = IFA_F_TENTATIVE | IFA_F_OPTIMISTIC;
>  
>  	if (!saddr) {

banned_flags should be declared here, under !saddr since that is the
scope of its use.


> -		if (ipv6_get_lladdr(dev, &addr_buf,
> -				   (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
> -			return;
> +		if (ipv6_get_lladdr(dev, &addr_buf, banned_flags)) {
> +			/* try global address */
> +			if (ipv6_get_addr(dev, &addr_buf, banned_flags))
> +				return;
> +		}
>  		saddr = &addr_buf;
>  	}
>  
> 


^ permalink raw reply

* Re: [PATCH 1/4] net: dsa: mv88e6xxx: add support for MV88E6220
From: Andrew Lunn @ 2019-07-30 13:49 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730100429.32479-2-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:04:26PM +0200, Hubert Feurstein wrote:
> The MV88E6220 is almost the same as MV88E6250 except that the ports 2-4 are
> not routed to pins. So the usable ports are 0, 1, 5 and 6.

Hi Hubert

Do the registers for the ports exist?

> +	[MV88E6220] = {
> +		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6220,
> +		.family = MV88E6XXX_FAMILY_6250,
> +		.name = "Marvell 88E6220",
> +		.num_databases = 64,
> +
> +		/* Ports 2-4 are not routed to pins
> +		 * => usable ports 0, 1, 5, 6
> +		 */
> +		.num_ports = 7,

I'm wondering if we should add something like

		.invalid_port_mask = BIT(2) | BIT(3) | BIT(4)


and

        /* Setup Switch Port Registers */
        for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
+		if (chip->info->invalid_port_mask & BIT(i) &&
+		    !dsa_is_unused_port(ds, i))
+		    return -EINVAL;
                if (dsa_is_unused_port(ds, i)) {
                        err = mv88e6xxx_port_set_state(chip, i,
                                                       BR_STATE_DISABLED);
  
	Andrew

^ permalink raw reply

* Re: [PATCH] bridge:fragmented packets dropped by bridge
From: Rundong Ge @ 2019-07-30 13:50 UTC (permalink / raw)
  To: Florian Westphal
  Cc: davem, kuznet, yoshfuji, netdev, Pablo Neira Ayuso, kadlec,
	Roopa Prabhu, netfilter-devel, coreteam, bridge, nikolay,
	linux-kernel
In-Reply-To: <20190730123542.zrsrfvcy7t2n3d4g@breakpoint.cc>

> How can a bridge forward a frame from A/B to mtu1300?
It is free for user to set different MTU for bridge ports. In our case
only tcp traffic between A/B and mtu 1300, and mss negotiation can
make packets less than 1300.

> What happens without netfilter or non-fragmented packets?
Without br_netfilter it works fine, there is no defragmentation and
refragmentation, fragmented packets will egress directly.

Florian Westphal <fw@strlen.de> 于2019年7月30日周二 下午8:35写道：
>
> Rundong Ge <rdong.ge@gmail.com> wrote:
> > Given following setup:
> > -modprobe br_netfilter
> > -echo '1' > /proc/sys/net/bridge/bridge-nf-call-iptables
> > -brctl addbr br0
> > -brctl addif br0 enp2s0
> > -brctl addif br0 enp3s0
> > -brctl addif br0 enp6s0
> > -ifconfig enp2s0 mtu 1300
> > -ifconfig enp3s0 mtu 1500
> > -ifconfig enp6s0 mtu 1500
> > -ifconfig br0 up
> >
> >                  multi-port
> > mtu1500 - mtu1500|bridge|1500 - mtu1500
> >   A                  |            B
> >                    mtu1300
>
> How can a bridge forward a frame from A/B to mtu1300?
>
> > With netfilter defragmentation/conntrack enabled, fragmented
> > packets from A will be defragmented in prerouting, and refragmented
> > at postrouting.
>
> Yes, but I don't see how that relates to the problem at hand.
>
> > But in this scenario the bridge found the frag_max_size(1500) is
> > larger than the dst mtu stored in the fake_rtable whitch is
> > always equal to the bridge's mtu 1300, then packets will be dopped.
>
> What happens without netfilter or non-fragmented packets?
>
> > This modifies ip_skb_dst_mtu to use the out dev's mtu instead
> > of bridge's mtu in bridge refragment.
>
> It seems quite a hack?  The above setup should use a router, not a bridge.

^ permalink raw reply

* [PATCH net] net: stmmac: Sync RX Buffer upon allocation
From: Jose Abreu @ 2019-07-30 13:57 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel, Jon Hunter

With recent changes that introduced support for Page Pool in stmmac, Jon
reported that NFS boot was no longer working on an ARM64 based platform
that had the IP behind an IOMMU.

As Page Pool API does not guarantee DMA syncing because of the use of
DMA_ATTR_SKIP_CPU_SYNC flag, we have to explicit sync the whole buffer upon
re-allocation because we are always re-using same pages.

In fact, ARM64 code invalidates the DMA area upon two situations [1]:
	- sync_single_for_cpu(): Invalidates if direction != DMA_TO_DEVICE
	- sync_single_for_device(): Invalidates if direction == DMA_FROM_DEVICE

So, as we must invalidate both the current RX buffer and the newly allocated
buffer we propose this fix.

[1] arch/arm64/mm/cache.S

Reported-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Fixes: 2af6106ae949 ("net: stmmac: Introducing support for Page Pool")
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: Jon Hunter <jonathanh@nvidia.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 98b1a5c6d537..9a4a56ad35cd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3271,9 +3271,11 @@ static inline int stmmac_rx_threshold_count(struct stmmac_rx_queue *rx_q)
 static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-	int dirty = stmmac_rx_dirty(priv, queue);
+	int len, dirty = stmmac_rx_dirty(priv, queue);
 	unsigned int entry = rx_q->dirty_rx;
 
+	len = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
+
 	while (dirty-- > 0) {
 		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
 		struct dma_desc *p;
@@ -3291,6 +3293,13 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 		}
 
 		buf->addr = page_pool_get_dma_addr(buf->page);
+
+		/* Sync whole allocation to device. This will invalidate old
+		 * data.
+		 */
+		dma_sync_single_for_device(priv->device, buf->addr, len,
+					   DMA_FROM_DEVICE);
+
 		stmmac_set_desc_addr(priv, p, buf->addr);
 		stmmac_refill_desc3(priv, rx_q, p);
 
@@ -3425,8 +3434,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			skb_copy_to_linear_data(skb, page_address(buf->page),
 						frame_len);
 			skb_put(skb, frame_len);
-			dma_sync_single_for_device(priv->device, buf->addr,
-						   frame_len, DMA_FROM_DEVICE);
 
 			if (netif_msg_pktdata(priv)) {
 				netdev_dbg(priv->dev, "frame received (%dbytes)",
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH net] net: bridge: mcast: don't delete permanent entries when fast leave is enabled
From: David Ahern @ 2019-07-30 13:58 UTC (permalink / raw)
  To: Nikolay Aleksandrov, netdev; +Cc: davem, roopa, bridge
In-Reply-To: <20190730112100.18156-1-nikolay@cumulusnetworks.com>

On 7/30/19 5:21 AM, Nikolay Aleksandrov wrote:
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 3d8deac2353d..f8cac3702712 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -1388,6 +1388,9 @@ br_multicast_leave_group(struct net_bridge *br,
>  			if (!br_port_group_equal(p, port, src))
>  				continue;
>  
> +			if (p->flags & MDB_PG_FLAGS_PERMANENT)
> +				break;
> +
>  			rcu_assign_pointer(*pp, p->next);
>  			hlist_del_init(&p->mglist);
>  			del_timer(&p->timer);

Why 'break' and not 'continue' like you have with
	if (!br_port_group_equal(p, port, src))

^ permalink raw reply

* Re: [PATCH 4/4] net: dsa: mv88e6xxx: add PTP support for MV88E6250 family
From: Andrew Lunn @ 2019-07-30 13:58 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730100429.32479-5-h.feurstein@gmail.com>

> diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
> index 720cace3db4e..64872251e479 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.h
> +++ b/drivers/net/dsa/mv88e6xxx/chip.h
> @@ -273,6 +273,10 @@ struct mv88e6xxx_chip {
>  	u16 trig_config;
>  	u16 evcap_config;
>  	u16 enable_count;
> +	u32 ptp_cc_shift;
> +	u32 ptp_cc_mult;
> +	u32 ptp_cc_mult_num;
> +	u32 ptp_cc_mult_dem;

Hi Hubert

Please add these to mv88e6xxx_ptp_ops. You can create a new one of
6250 which has the different values.

>  
>  	/* Per-port timestamping resources. */
>  	struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
> diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
> index 768d256f7c9f..51cdf4712517 100644
> --- a/drivers/net/dsa/mv88e6xxx/ptp.c
> +++ b/drivers/net/dsa/mv88e6xxx/ptp.c
> @@ -15,11 +15,38 @@
>  #include "hwtstamp.h"
>  #include "ptp.h"
>  
> -/* Raw timestamps are in units of 8-ns clock periods. */
> -#define CC_SHIFT	28
> -#define CC_MULT		(8 << CC_SHIFT)
> -#define CC_MULT_NUM	(1 << 9)
> -#define CC_MULT_DEM	15625ULL
> +/* The adjfine API clamps ppb between [-32,768,000, 32,768,000], and
> + * therefore scaled_ppm between [-2,147,483,648, 2,147,483,647].
> + * Set the maximum supported ppb to a round value smaller than the maximum.
> + *
> + * Percentually speaking, this is a +/- 0.032x adjustment of the
> + * free-running counter (0.968x to 1.032x).
> + */
> +#define MV88E6XXX_MAX_ADJ_PPB	32000000
> +
> +/* Family MV88E6250:
> + * Raw timestamps are in units of 10-ns clock periods.
> + *
> + * clkadj = scaled_ppm * 10*2^28 / (10^6 * 2^16)
> + * simplifies to
> + * clkadj = scaled_ppm * 2^7 / 5^5
> + */
> +#define MV88E6250_CC_SHIFT	28
> +#define MV88E6250_CC_MULT	(10 << MV88E6250_CC_SHIFT)
> +#define MV88E6250_CC_MULT_NUM	(1 << 7)
> +#define MV88E6250_CC_MULT_DEM	3125ULL
> +
> +/* Other families:
> + * Raw timestamps are in units of 8-ns clock periods.
> + *
> + * clkadj = scaled_ppm * 8*2^28 / (10^6 * 2^16)
> + * simplifies to
> + * clkadj = scaled_ppm * 2^9 / 5^6
> + */
> +#define MV88E6XXX_CC_SHIFT	28
> +#define MV88E6XXX_CC_MULT	(8 << MV88E6XXX_CC_SHIFT)
> +#define MV88E6XXX_CC_MULT_NUM	(1 << 9)
> +#define MV88E6XXX_CC_MULT_DEM	15625ULL

Nice comments :-)

     Andrew

^ permalink raw reply

* RE: [PATCH net-next 3/3] net: stmmac: Introducing support for Page Pool
From: Jose Abreu @ 2019-07-30 13:58 UTC (permalink / raw)
  To: Jon Hunter, Jose Abreu, Robin Murphy,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com,
	linux-arm-kernel@lists.infradead.org, Catalin Marinas,
	Will Deacon
  Cc: Joao Pinto, Alexandre Torgue, Maxime Ripard, Chen-Yu Tsai,
	Maxime Coquelin, linux-tegra, Giuseppe Cavallaro,
	David S . Miller
In-Reply-To: <8cb2ac13-3009-2117-d55f-6f1126b690e4@nvidia.com>

From: Jon Hunter <jonathanh@nvidia.com>
Date: Jul/30/2019, 14:36:39 (UTC+00:00)

> 
> On 30/07/2019 10:39, Jose Abreu wrote:
> 
> ...
> 
> > I looked at netsec implementation and I noticed that we are syncing the 
> > old buffer for device instead of the new one. netsec syncs the buffer 
> > for device immediately after the allocation which may be what we have to 
> > do. Maybe the attached patch can make things work for you ?
> 
> Great! This one works. I have booted this several times and I am no
> longer seeing any issues. Thanks for figuring this out!
> 
> Feel free to add my ...
> 
> Tested-by: Jon Hunter <jonathanh@nvidia.com>

This one was hard to find :) Thank you for your patience in testing 
this!

---
Thanks,
Jose Miguel Abreu

^ permalink raw reply

* Re: [PATCH] bridge:fragmented packets dropped by bridge
From: Rundong Ge @ 2019-07-30 13:58 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: davem, kuznet, yoshfuji, netdev, Pablo Neira Ayuso, kadlec,
	Florian Westphal, Roopa Prabhu, netfilter-devel, coreteam, bridge,
	linux-kernel
In-Reply-To: <1dc87e69-628b-fd04-619a-8dbe5bdfa108@cumulusnetworks.com>

Yes it is about the frag_size check in br_nf_ip_fragment(). As i said
without br_netfilter the packets forwarding is fine.
And I feel it is weird that br_nf_dev_queue_xmit() use out dev's mtu
to decide whether to do the fragmentation, but
then br_nf_ip_fragment() use bridge's mtu to do the actual fragmentation.

And in this case fragmented packets fit the out dev mtu but were
dropped, I think it is not right.

Nikolay Aleksandrov <nikolay@cumulusnetworks.com> 于2019年7月30日周二 下午8:41写道：
>
> On 30/07/2019 15:25, Rundong Ge wrote:
> > Given following setup:
> > -modprobe br_netfilter
> > -echo '1' > /proc/sys/net/bridge/bridge-nf-call-iptables
> > -brctl addbr br0
> > -brctl addif br0 enp2s0
> > -brctl addif br0 enp3s0
> > -brctl addif br0 enp6s0
> > -ifconfig enp2s0 mtu 1300
> > -ifconfig enp3s0 mtu 1500
> > -ifconfig enp6s0 mtu 1500
> > -ifconfig br0 up
> >
> >                  multi-port
> > mtu1500 - mtu1500|bridge|1500 - mtu1500
> >   A                  |            B
> >                    mtu1300
> >
> > With netfilter defragmentation/conntrack enabled, fragmented
> > packets from A will be defragmented in prerouting, and refragmented
> > at postrouting.
> > But in this scenario the bridge found the frag_max_size(1500) is
> > larger than the dst mtu stored in the fake_rtable whitch is
> > always equal to the bridge's mtu 1300, then packets will be dopped.
> >
> > This modifies ip_skb_dst_mtu to use the out dev's mtu instead
> > of bridge's mtu in bridge refragment.
> >
> > Signed-off-by: Rundong Ge <rdong.ge@gmail.com>
> > ---
> >  include/net/ip.h | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/include/net/ip.h b/include/net/ip.h
> > index 29d89de..0512de3 100644
> > --- a/include/net/ip.h
> > +++ b/include/net/ip.h
> > @@ -450,6 +450,8 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
> >  static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
> >                                         const struct sk_buff *skb)
> >  {
> > +     if ((skb_dst(skb)->flags & DST_FAKE_RTABLE) && skb->dev)
> > +             return min(skb->dev->mtu, IP_MAX_MTU);
> >       if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
> >               bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
> >
> >
>
> I don't think this is correct, there's a reason why the bridge chooses the smallest
> possible MTU out of its members and this is simply a hack to circumvent it.
> If you really like to do so just set the bridge MTU manually, we've added support
> so it won't change automatically to the smallest, but then how do you pass packets
> 1500 -> 1300 in this setup ?
>
> You're talking about the frag_size check in br_nf_ip_fragment(), right ?
>

^ permalink raw reply

* Re: [PATCH] net: dsa: mv88e6xxx: extend PTP gettime function to read system clock
From: Andrew Lunn @ 2019-07-30 14:00 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, richardcochran
In-Reply-To: <20190730101007.344-1-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:10:07PM +0200, Hubert Feurstein wrote:
> This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Please include the PTP maintainer for patches like this. Richard also
wrote this PTP code.

      Andrew

> ---
>  drivers/net/dsa/mv88e6xxx/ptp.c | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
> index 51cdf4712517..1ff983376f95 100644
> --- a/drivers/net/dsa/mv88e6xxx/ptp.c
> +++ b/drivers/net/dsa/mv88e6xxx/ptp.c
> @@ -230,14 +230,17 @@ static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
>  	return 0;
>  }
>  
> -static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
> -				 struct timespec64 *ts)
> +static int mv88e6xxx_ptp_gettimex(struct ptp_clock_info *ptp,
> +				  struct timespec64 *ts,
> +				  struct ptp_system_timestamp *sts)
>  {
>  	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
>  	u64 ns;
>  
>  	mv88e6xxx_reg_lock(chip);
> +	ptp_read_system_prets(sts);
>  	ns = timecounter_read(&chip->tstamp_tc);
> +	ptp_read_system_postts(sts);
>  	mv88e6xxx_reg_unlock(chip);
>  
>  	*ts = ns_to_timespec64(ns);
> @@ -386,7 +389,7 @@ static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
>  	struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
>  	struct timespec64 ts;
>  
> -	mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
> +	mv88e6xxx_ptp_gettimex(&chip->ptp_clock_info, &ts, NULL);
>  
>  	schedule_delayed_work(&chip->overflow_work,
>  			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
> @@ -444,7 +447,7 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
>  	chip->ptp_clock_info.max_adj    = MV88E6XXX_MAX_ADJ_PPB;
>  	chip->ptp_clock_info.adjfine	= mv88e6xxx_ptp_adjfine;
>  	chip->ptp_clock_info.adjtime	= mv88e6xxx_ptp_adjtime;
> -	chip->ptp_clock_info.gettime64	= mv88e6xxx_ptp_gettime;
> +	chip->ptp_clock_info.gettimex64	= mv88e6xxx_ptp_gettimex;
>  	chip->ptp_clock_info.settime64	= mv88e6xxx_ptp_settime;
>  	chip->ptp_clock_info.enable	= ptp_ops->ptp_enable;
>  	chip->ptp_clock_info.verify	= ptp_ops->ptp_verify;
> -- 
> 2.22.0
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox