Netdev List
 help / color / mirror / Atom feed
* [PATCH ipsec-next 0/7] ipsec: add TCP encapsulation support (RFC 8229)
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca

This patchset introduces support for TCP encapsulation of IKE and ESP
messages, as defined by RFC 8229 [0]. It is an evolution of what
Herbert Xu proposed in January 2018 [1] that addresses the main
criticism against it, by not interfering with the TCP implementation
at all. The networking stack now has infrastructure for this: TCP ULPs
and Stream Parsers.

The first patches are preparation and refactoring, and the final patch
adds the feature.

The main omission in this submission is IPv6 support. ESP
encapsulation over UDP with IPv6 is currently not supported in the
kernel either, as UDP encapsulation is aimed at NAT traversal, and NAT
is not frequently used with IPv6.

Some of the code is taken directly, or slightly modified, from Herbert
Xu's original submission [1]. The ULP and strparser pieces are
new. This work was presented and discussed at the IPsec workshop and
netdev 0x13 conference [2] in Prague, last March.

An equivalent of patch #1 (skbuff: Avoid sleeping in
skb_send_sock_locked) is already present in other trees (but not
ipsec-next) as commit bd95e678e0f6 ("bpf: sockmap, fix use after free
from sleep in psock backlog workqueue"), I'm only including it here so
that this patchset works correctly on top of ipsec-next/master.

No changes in the patchset since the RFC.

[0] https://tools.ietf.org/html/rfc8229
[1] https://patchwork.ozlabs.org/patch/859107/
[2] https://netdevconf.org/0x13/session.html?talk-ipsec-encap

Herbert Xu (1):
  skbuff: Avoid sleeping in skb_send_sock_locked

Sabrina Dubroca (6):
  net: add queue argument to __skb_wait_for_more_packets and
    __skb_{,try_}recv_datagram
  xfrm: introduce xfrm_trans_queue_net
  xfrm: add route lookup to xfrm4_rcv_encap
  esp4: prepare esp_input_done2 for non-UDP encapsulation
  esp4: split esp_output_udp_encap and introduce esp_output_encap
  xfrm: add espintcp (RFC 8229)

 include/linux/skbuff.h    |  11 +-
 include/net/espintcp.h    |  38 +++
 include/net/xfrm.h        |   4 +
 include/uapi/linux/udp.h  |   1 +
 net/core/datagram.c       |  26 +-
 net/core/skbuff.c         |   1 +
 net/ipv4/esp4.c           | 262 ++++++++++++++++++--
 net/ipv4/udp.c            |   3 +-
 net/ipv4/xfrm4_protocol.c |   9 +
 net/unix/af_unix.c        |   7 +-
 net/xfrm/Kconfig          |   9 +
 net/xfrm/Makefile         |   1 +
 net/xfrm/espintcp.c       | 505 ++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_input.c     |  21 +-
 net/xfrm/xfrm_policy.c    |   7 +
 net/xfrm/xfrm_state.c     |   3 +
 16 files changed, 862 insertions(+), 46 deletions(-)
 create mode 100644 include/net/espintcp.h
 create mode 100644 net/xfrm/espintcp.c

-- 
2.22.0


^ permalink raw reply

* [PATCH ipsec-next 1/7] net: add queue argument to __skb_wait_for_more_packets and __skb_{,try_}recv_datagram
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

This will be used by ESP over TCP to handle the queue of IKE messages.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 include/linux/skbuff.h | 11 ++++++++---
 net/core/datagram.c    | 26 ++++++++++++++++----------
 net/ipv4/udp.c         |  3 ++-
 net/unix/af_unix.c     |  7 ++++---
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 98ff5ac98caa..149c542115a6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3377,7 +3377,8 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 	for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
 
 
-int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
+				int *err, long *timeo_p,
 				const struct sk_buff *skb);
 struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 					  struct sk_buff_head *queue,
@@ -3386,12 +3387,16 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 							   struct sk_buff *skb),
 					  int *off, int *err,
 					  struct sk_buff **last);
-struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
+struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
+					struct sk_buff_head *queue,
+					unsigned int flags,
 					void (*destructor)(struct sock *sk,
 							   struct sk_buff *skb),
 					int *off, int *err,
 					struct sk_buff **last);
-struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+struct sk_buff *__skb_recv_datagram(struct sock *sk,
+				    struct sk_buff_head *sk_queue,
+				    unsigned int flags,
 				    void (*destructor)(struct sock *sk,
 						       struct sk_buff *skb),
 				    int *off, int *err);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 45a162ef5e02..5fe681e1f4ae 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -84,7 +84,8 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i
 /*
  * Wait for the last received packet to be different from skb
  */
-int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
+				int *err, long *timeo_p,
 				const struct sk_buff *skb)
 {
 	int error;
@@ -97,7 +98,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
 	if (error)
 		goto out_err;
 
-	if (sk->sk_receive_queue.prev != skb)
+	if (queue->prev != skb)
 		goto out;
 
 	/* Socket shut down? */
@@ -241,13 +242,14 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
  *	quite explicitly by POSIX 1003.1g, don't change them without having
  *	the standard around please.
  */
-struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
+					struct sk_buff_head *queue,
+					unsigned int flags,
 					void (*destructor)(struct sock *sk,
 							   struct sk_buff *skb),
 					int *off, int *err,
 					struct sk_buff **last)
 {
-	struct sk_buff_head *queue = &sk->sk_receive_queue;
 	struct sk_buff *skb;
 	unsigned long cpu_flags;
 	/*
@@ -278,7 +280,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 			break;
 
 		sk_busy_loop(sk, flags & MSG_DONTWAIT);
-	} while (sk->sk_receive_queue.prev != *last);
+	} while (queue->prev != *last);
 
 	error = -EAGAIN;
 
@@ -288,7 +290,9 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 }
 EXPORT_SYMBOL(__skb_try_recv_datagram);
 
-struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+struct sk_buff *__skb_recv_datagram(struct sock *sk,
+				    struct sk_buff_head *sk_queue,
+				    unsigned int flags,
 				    void (*destructor)(struct sock *sk,
 						       struct sk_buff *skb),
 				    int *off, int *err)
@@ -299,15 +303,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	do {
-		skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
-					      &last);
+		skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor,
+					      off, err, &last);
 		if (skb)
 			return skb;
 
 		if (*err != -EAGAIN)
 			break;
 	} while (timeo &&
-		!__skb_wait_for_more_packets(sk, err, &timeo, last));
+		 !__skb_wait_for_more_packets(sk, sk_queue, err,
+					      &timeo, last));
 
 	return NULL;
 }
@@ -318,7 +323,8 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
 {
 	int off = 0;
 
-	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+	return __skb_recv_datagram(sk, &sk->sk_receive_queue,
+				   flags | (noblock ? MSG_DONTWAIT : 0),
 				   NULL, &off, err);
 }
 EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fb250ed53d4..40067fc4c82b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1690,7 +1690,8 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
 
 		/* sk_queue is empty, reader_queue may contain peeked packets */
 	} while (timeo &&
-		 !__skb_wait_for_more_packets(sk, &error, &timeo,
+		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
+					      &error, &timeo,
 					      (struct sk_buff *)sk_queue));
 
 	*err = error;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e68d7454f2e3..91c1ffd82ff9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2053,8 +2053,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 		mutex_lock(&u->iolock);
 
 		skip = sk_peek_offset(sk, flags);
-		skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
-					      &last);
+		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
+					      NULL, &skip, &err, &last);
 		if (skb)
 			break;
 
@@ -2063,7 +2063,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 		if (err != -EAGAIN)
 			break;
 	} while (timeo &&
-		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
+		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
+					      &err, &timeo, last));
 
 	if (!skb) { /* implies iolock unlocked */
 		unix_state_lock(sk);
-- 
2.22.0


^ permalink raw reply related

* [PATCH ipsec-next 2/7] skbuff: Avoid sleeping in skb_send_sock_locked
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

From: Herbert Xu <herbert@gondor.apana.org.au>

For a function that needs to be called with the socket spinlock
held, sleeping would seem to be a bad idea.  This function does
in fact avoid sleeping when calling kernel_sendpage_locked on the
page part of the skb.  However, it doesn't do that when sending
the linear part.  Resulting in sleeping when the socket send buffer
is full.

This patch fixes it by setting the MSG_DONTWAIT flag when calling
kernel_sendmsg_locked.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 net/core/skbuff.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b50a5e3ac4e4..f863c7ef417c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2367,6 +2367,7 @@ int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
 		kv.iov_base = skb->data + offset;
 		kv.iov_len = slen;
 		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT;
 
 		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
 		if (ret <= 0)
-- 
2.22.0


^ permalink raw reply related

* [PATCH ipsec-next 3/7] xfrm: introduce xfrm_trans_queue_net
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

This will be used by TCP encapsulation to write packets to the encap
socket without holding the user socket's lock. Without this reinjection,
we're already holding the lock of the user socket, and then try to lock
the encap socket as well when we enqueue the encrypted packet.

While at it, add a BUILD_BUG_ON like we usually do for skb->cb, since
it's missing for struct xfrm_trans_cb.

Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 include/net/xfrm.h    |  3 +++
 net/xfrm/xfrm_input.c | 21 +++++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index b22db30c3d88..afedc9210c4b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1549,6 +1549,9 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
 int xfrm_init_state(struct xfrm_state *x);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
+			 int (*finish)(struct net *, struct sock *,
+				       struct sk_buff *));
 int xfrm_trans_queue(struct sk_buff *skb,
 		     int (*finish)(struct net *, struct sock *,
 				   struct sk_buff *));
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6088bc2dc11e..eb0f0e64c71c 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -36,6 +36,7 @@ struct xfrm_trans_cb {
 #endif
 	} header;
 	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+	struct net *net;
 };
 
 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
@@ -763,12 +764,13 @@ static void xfrm_trans_reinject(unsigned long data)
 	skb_queue_splice_init(&trans->queue, &queue);
 
 	while ((skb = __skb_dequeue(&queue)))
-		XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+		XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
+					       NULL, skb);
 }
 
-int xfrm_trans_queue(struct sk_buff *skb,
-		     int (*finish)(struct net *, struct sock *,
-				   struct sk_buff *))
+int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
+			 int (*finish)(struct net *, struct sock *,
+				       struct sk_buff *))
 {
 	struct xfrm_trans_tasklet *trans;
 
@@ -777,11 +779,22 @@ int xfrm_trans_queue(struct sk_buff *skb,
 	if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
 		return -ENOBUFS;
 
+	BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
+
 	XFRM_TRANS_SKB_CB(skb)->finish = finish;
+	XFRM_TRANS_SKB_CB(skb)->net = net;
 	__skb_queue_tail(&trans->queue, skb);
 	tasklet_schedule(&trans->tasklet);
 	return 0;
 }
+EXPORT_SYMBOL(xfrm_trans_queue_net);
+
+int xfrm_trans_queue(struct sk_buff *skb,
+		     int (*finish)(struct net *, struct sock *,
+				   struct sk_buff *))
+{
+	return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish);
+}
 EXPORT_SYMBOL(xfrm_trans_queue);
 
 void __init xfrm_input_init(void)
-- 
2.22.0


^ permalink raw reply related

* [PATCH ipsec-next 4/7] xfrm: add route lookup to xfrm4_rcv_encap
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

At this point, with TCP encapsulation, the dst may be gone, but
xfrm_input needs one.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 net/ipv4/xfrm4_protocol.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index bcab48944c15..1665e1a05ec5 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -76,6 +76,14 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 	if (!head)
 		goto out;
 
+	if (!skb_dst(skb)) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					 iph->tos, skb->dev))
+			goto drop;
+	}
+
 	for_each_protocol_rcu(*head, handler)
 		if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
 			return ret;
@@ -83,6 +91,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 out:
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 
+drop:
 	kfree_skb(skb);
 	return 0;
 }
-- 
2.22.0


^ permalink raw reply related

* [PATCH ipsec-next 5/7] esp4: prepare esp_input_done2 for non-UDP encapsulation
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

For espintcp encapsulation, we will need to get the source port from the
TCP header instead of UDP. Introduce a variable to hold the port.

Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 net/ipv4/esp4.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 5c967764041f..c5d826642229 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -601,6 +601,18 @@ int esp_input_done2(struct sk_buff *skb, int err)
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
 		struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
+		__be16 source;
+
+		switch (x->encap->encap_type) {
+		case UDP_ENCAP_ESPINUDP:
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			source = uh->source;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			err = -EINVAL;
+			goto out;
+		}
 
 		/*
 		 * 1) if the NAT-T peer's IP or port changed then
@@ -609,11 +621,11 @@ int esp_input_done2(struct sk_buff *skb, int err)
 		 *    SRC ports.
 		 */
 		if (iph->saddr != x->props.saddr.a4 ||
-		    uh->source != encap->encap_sport) {
+		    source != encap->encap_sport) {
 			xfrm_address_t ipaddr;
 
 			ipaddr.a4 = iph->saddr;
-			km_new_mapping(x, &ipaddr, uh->source);
+			km_new_mapping(x, &ipaddr, source);
 
 			/* XXX: perhaps add an extra
 			 * policy check here, to see
-- 
2.22.0


^ permalink raw reply related

* [PATCH ipsec-next 7/7] xfrm: add espintcp (RFC 8229)
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

TCP encapsulation of IKE and IPsec messages (RFC 8229) is implemented
as a TCP ULP, overriding in particular the sendmsg and recvmsg
operations. A Stream Parser is used to extract messages out of the TCP
stream using the first 2 bytes as length marker. Received IKE messages
are put on "ike_queue", waiting to be dequeued by the custom recvmsg
implementation. Received ESP messages are sent to XFRM, like with UDP
encapsulation.

Some of this code is taken from the original submission by Herbert
Xu. Currently, only IPv4 is supported, like for UDP encapsulation.

Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 include/net/espintcp.h   |  38 +++
 include/net/xfrm.h       |   1 +
 include/uapi/linux/udp.h |   1 +
 net/ipv4/esp4.c          | 189 ++++++++++++++-
 net/xfrm/Kconfig         |   9 +
 net/xfrm/Makefile        |   1 +
 net/xfrm/espintcp.c      | 505 +++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_policy.c   |   7 +
 net/xfrm/xfrm_state.c    |   3 +
 9 files changed, 751 insertions(+), 3 deletions(-)
 create mode 100644 include/net/espintcp.h
 create mode 100644 net/xfrm/espintcp.c

diff --git a/include/net/espintcp.h b/include/net/espintcp.h
new file mode 100644
index 000000000000..02fc28c82d30
--- /dev/null
+++ b/include/net/espintcp.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_ESPINTCP_H
+#define _NET_ESPINTCP_H
+
+#include <net/strparser.h>
+#include <linux/skmsg.h>
+
+void __init espintcp_init(void);
+
+int espintcp_push_skb(struct sock *sk, struct sk_buff *skb);
+int espintcp_queue_out(struct sock *sk, struct sk_buff *skb);
+bool tcp_is_ulp_esp(struct sock *sk);
+
+struct espintcp_msg {
+	struct sk_buff *skb;
+	struct sk_msg skmsg;
+	int offset;
+	int len;
+};
+
+struct espintcp_ctx {
+	struct strparser strp;
+	struct sk_buff_head ike_queue;
+	struct sk_buff_head out_queue;
+	struct espintcp_msg partial;
+	void (*saved_data_ready)(struct sock *sk);
+	void (*saved_write_space)(struct sock *sk);
+	struct work_struct work;
+	bool tx_running;
+};
+
+static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	return icsk->icsk_ulp_data;
+}
+#endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index afedc9210c4b..3dd3c199ecfa 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -193,6 +193,7 @@ struct xfrm_state {
 
 	/* Data for encapsulator */
 	struct xfrm_encap_tmpl	*encap;
+	struct sock __rcu	*encap_sk;
 
 	/* Data for care-of address */
 	xfrm_address_t	*coaddr;
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 30baccb6c9c4..4828794efcf8 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -42,5 +42,6 @@ struct udphdr {
 #define UDP_ENCAP_GTP0		4 /* GSM TS 09.60 */
 #define UDP_ENCAP_GTP1U		5 /* 3GPP TS 29.060 */
 #define UDP_ENCAP_RXRPC		6
+#define TCP_ENCAP_ESPINTCP	7 /* Yikes, this is really xfrm encap types. */
 
 #endif /* _UAPI_LINUX_UDP_H */
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 033c61d27148..d101aca546c3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -18,6 +18,8 @@
 #include <net/icmp.h>
 #include <net/protocol.h>
 #include <net/udp.h>
+#include <net/tcp.h>
+#include <net/espintcp.h>
 
 #include <linux/highmem.h>
 
@@ -117,6 +119,129 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 			put_page(sg_page(sg));
 }
 
+#ifdef CONFIG_XFRM_ESPINTCP
+struct esp_tcp_sk {
+	struct sock *sk;
+	struct rcu_head rcu;
+};
+
+static void esp_free_tcp_sk(struct rcu_head *head)
+{
+	struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+
+	sock_put(esk->sk);
+	kfree(esk);
+}
+
+static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
+{
+	struct xfrm_encap_tmpl *encap = x->encap;
+	struct esp_tcp_sk *esk;
+	__be16 sport, dport;
+	struct sock *nsk;
+	struct sock *sk;
+
+	sk = rcu_dereference(x->encap_sk);
+	if (sk && sk->sk_state == TCP_ESTABLISHED)
+		return sk;
+
+	spin_lock_bh(&x->lock);
+	sport = encap->encap_sport;
+	dport = encap->encap_dport;
+	nsk = rcu_dereference_protected(x->encap_sk,
+					lockdep_is_held(&x->lock));
+	if (sk && sk == nsk) {
+		esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+		if (!esk) {
+			spin_unlock_bh(&x->lock);
+			return ERR_PTR(-ENOMEM);
+		}
+		RCU_INIT_POINTER(x->encap_sk, NULL);
+		esk->sk = sk;
+		call_rcu(&esk->rcu, esp_free_tcp_sk);
+	}
+	spin_unlock_bh(&x->lock);
+
+	sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
+				     dport, x->props.saddr.a4, sport, 0);
+	if (!sk)
+		return ERR_PTR(-ENOENT);
+
+	if (!tcp_is_ulp_esp(sk)) {
+		sock_put(sk);
+		return ERR_PTR(-EINVAL);
+	}
+
+	spin_lock_bh(&x->lock);
+	nsk = rcu_dereference_protected(x->encap_sk,
+					lockdep_is_held(&x->lock));
+	if (encap->encap_sport != sport ||
+	    encap->encap_dport != dport) {
+		sock_put(sk);
+		sk = nsk ?: ERR_PTR(-EREMCHG);
+	} else if (sk == nsk) {
+		sock_put(sk);
+	} else {
+		rcu_assign_pointer(x->encap_sk, sk);
+	}
+	spin_unlock_bh(&x->lock);
+
+	return sk;
+}
+
+static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct sock *sk;
+	int err;
+
+	rcu_read_lock();
+
+	sk = esp_find_tcp_sk(x);
+	err = PTR_ERR(sk);
+	if (IS_ERR(sk))
+		goto out;
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
+		err = espintcp_queue_out(sk, skb);
+		if (err < 0)
+			goto unlock_sock;
+	} else {
+		err = espintcp_push_skb(sk, skb);
+	}
+
+unlock_sock:
+	bh_unlock_sock(sk);
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
+				   struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_state *x = dst->xfrm;
+
+	return esp_output_tcp_finish(x, skb);
+}
+
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	local_bh_disable();
+	err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+	local_bh_enable();
+
+	/* EINPROGRESS just happens to do the right thing.  It
+	 * actually means that the skb has been consumed and
+	 * isn't coming back.
+	 */
+	return err ?: -EINPROGRESS;
+}
+#endif
+
 static void esp_output_done(struct crypto_async_request *base, int err)
 {
 	struct sk_buff *skb = base->data;
@@ -147,7 +272,13 @@ static void esp_output_done(struct crypto_async_request *base, int err)
 		secpath_reset(skb);
 		xfrm_dev_resume(skb);
 	} else {
-		xfrm_output_resume(skb, err);
+#ifdef CONFIG_XFRM_ESPINTCP
+		if (!err &&
+		    x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+			esp_output_tail_tcp(x, skb);
+		else
+#endif
+			xfrm_output_resume(skb, err);
 	}
 }
 
@@ -236,7 +367,7 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
 	unsigned int len;
 
 	len = skb->len + esp->tailen - skb_transport_offset(skb);
-	if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
+	if (len + sizeof(struct iphdr) > IP_MAX_MTU)
 		return ERR_PTR(-EMSGSIZE);
 
 	uh = (struct udphdr *)esp->esph;
@@ -256,6 +387,23 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
 	return (struct ip_esp_hdr *)(uh + 1);
 }
 
+#ifdef CONFIG_XFRM_ESPINTCP
+static struct ip_esp_hdr *esp_output_tcp_encap(struct sk_buff *skb,
+					       struct esp_info *esp)
+{
+	__be16 *lenp = (void *)esp->esph;
+	unsigned int len;
+
+	len = skb->len + esp->tailen - skb_transport_offset(skb);
+	if (len > IP_MAX_MTU)
+		return ERR_PTR(-EMSGSIZE);
+
+	*lenp = htons(len);
+
+	return (struct ip_esp_hdr *)(lenp + 1);
+}
+#endif
+
 static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
 			    struct esp_info *esp)
 {
@@ -276,6 +424,22 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
 		break;
+#ifdef CONFIG_XFRM_ESPINTCP
+	case TCP_ENCAP_ESPINTCP: {
+		struct sock *sk;
+
+		rcu_read_lock();
+		sk = esp_find_tcp_sk(x);
+		if (IS_ERR(sk)) {
+			rcu_read_unlock();
+			return PTR_ERR(sk);
+		}
+
+		esph = esp_output_tcp_encap(skb, esp);
+		rcu_read_unlock();
+		break;
+	}
+#endif
 	}
 
 	if (IS_ERR(esph))
@@ -296,7 +460,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
 
-	/* this is non-NULL only with UDP Encapsulation */
+	/* this is non-NULL only with TCP/UDP Encapsulation */
 	if (x->encap) {
 		int err = esp_output_encap(x, skb, esp);
 
@@ -491,6 +655,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	if (sg != dsg)
 		esp_ssg_unref(x, tmp);
 
+#ifdef CONFIG_XFRM_ESPINTCP
+	if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+		err = esp_output_tail_tcp(x, skb);
+#endif
+
 error_free:
 	kfree(tmp);
 error:
@@ -617,10 +786,16 @@ int esp_input_done2(struct sk_buff *skb, int err)
 
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
+		struct tcphdr *th = (void *)(skb_network_header(skb) + ihl);
 		struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
 		__be16 source;
 
 		switch (x->encap->encap_type) {
+#ifdef CONFIG_XFRM_ESPINTCP
+		case TCP_ENCAP_ESPINTCP:
+			source = th->source;
+			break;
+#endif
 		case UDP_ENCAP_ESPINUDP:
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 			source = uh->source;
@@ -1017,6 +1192,14 @@ static int esp_init_state(struct xfrm_state *x)
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 			x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
 			break;
+#ifdef CONFIG_XFRM_ESPINTCP
+		case TCP_ENCAP_ESPINTCP:
+			/* only the length field, TCP encap is done by
+			 * the socket
+			 */
+			x->props.header_len += 2;
+			break;
+#endif
 		}
 	}
 
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index c967fc3c38c8..ccc012b3ea10 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -71,6 +71,15 @@ config XFRM_IPCOMP
 	select CRYPTO
 	select CRYPTO_DEFLATE
 
+config XFRM_ESPINTCP
+	bool "ESP in TCP encapsulation (RFC 8229)"
+	depends on XFRM && INET_ESP
+	select STREAM_PARSER
+	help
+	  Support for RFC 8229 encapsulation of ESP and IKE over TCP sockets.
+
+	  If unsure, say N.
+
 config NET_KEY
 	tristate "PF_KEY sockets"
 	select XFRM_ALGO
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index fbc4552d17b8..2d4bb4b9f75e 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
 obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
+obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
new file mode 100644
index 000000000000..1d561a00c4b0
--- /dev/null
+++ b/net/xfrm/espintcp.c
@@ -0,0 +1,505 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/tcp.h>
+#include <net/strparser.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <net/espintcp.h>
+#include <linux/skmsg.h>
+#include <net/inet_common.h>
+
+static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
+			  struct sock *sk)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf ||
+	    !sk_rmem_schedule(sk, skb, skb->truesize)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	skb_set_owner_r(skb, sk);
+
+	memset(skb->cb, 0, sizeof(skb->cb));
+	skb_queue_tail(&ctx->ike_queue, skb);
+	ctx->saved_data_ready(sk);
+}
+
+static void handle_esp(struct sk_buff *skb, struct sock *sk)
+{
+	skb_reset_transport_header(skb);
+	memset(skb->cb, 0, sizeof(skb->cb));
+
+	rcu_read_lock();
+	skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
+	local_bh_disable();
+	xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+	local_bh_enable();
+	rcu_read_unlock();
+}
+
+static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
+{
+	struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx,
+						strp);
+	struct strp_msg *rxm = strp_msg(skb);
+	u32 nonesp_marker;
+	int err;
+
+	err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker,
+			    sizeof(nonesp_marker));
+	if (err < 0) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* remove header, leave non-ESP marker/SPI */
+	if (!__pskb_pull(skb, rxm->offset + 2)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	if (pskb_trim(skb, rxm->full_len - 2) != 0) {
+		kfree_skb(skb);
+		return;
+	}
+
+	if (nonesp_marker == 0)
+		handle_nonesp(ctx, skb, strp->sk);
+	else
+		handle_esp(skb, strp->sk);
+}
+
+static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+	struct strp_msg *rxm = strp_msg(skb);
+	__be16 blen;
+	u16 len;
+	int err;
+
+	if (skb->len < rxm->offset + 2)
+		return 0;
+
+	err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen));
+	if (err < 0)
+		return err;
+
+	len = be16_to_cpu(blen);
+	if (len < 6)
+		return -EINVAL;
+
+	return len;
+}
+
+static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			    int nonblock, int flags, int *addr_len)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+	struct sk_buff *skb;
+	int err = 0;
+	int copied;
+	int off = 0;
+
+	flags |= nonblock ? MSG_DONTWAIT : 0;
+
+	skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, NULL, &off, &err);
+	if (!skb)
+		return err;
+
+	copied = len;
+	if (copied > skb->len)
+		copied = skb->len;
+	else if (copied < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
+	if (unlikely(err)) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+	kfree_skb(skb);
+	return copied;
+}
+
+int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+
+	if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
+		return -ENOBUFS;
+
+	__skb_queue_tail(&ctx->out_queue, skb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(espintcp_queue_out);
+
+/* espintcp length field is 2B and length includes the length field's size */
+#define MAX_ESPINTCP_MSG (((1 << 16) - 1) - 2)
+
+static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg,
+				   int flags)
+{
+	do {
+		int ret;
+
+		ret = skb_send_sock_locked(sk, emsg->skb,
+					   emsg->offset, emsg->len);
+		if (ret < 0)
+			return ret;
+
+		emsg->len -= ret;
+		emsg->offset += ret;
+	} while (emsg->len > 0);
+
+	kfree_skb(emsg->skb);
+	memset(emsg, 0, sizeof(*emsg));
+
+	return 0;
+}
+
+static int espintcp_sendskmsg_locked(struct sock *sk,
+				     struct espintcp_msg *emsg, int flags)
+{
+	struct sk_msg *skmsg = &emsg->skmsg;
+	struct scatterlist *sg;
+	int done = 0;
+	int ret;
+
+	flags |= MSG_SENDPAGE_NOTLAST;
+	sg = &skmsg->sg.data[skmsg->sg.start];
+	do {
+		size_t size = sg->length - emsg->offset;
+		int offset = sg->offset + emsg->offset;
+		struct page *p;
+
+		emsg->offset = 0;
+
+		if (sg_is_last(sg))
+			flags &= ~MSG_SENDPAGE_NOTLAST;
+
+		p = sg_page(sg);
+retry:
+		ret = do_tcp_sendpages(sk, p, offset, size, flags);
+		if (ret < 0) {
+			emsg->offset = offset - sg->offset;
+			skmsg->sg.start += done;
+			return ret;
+		}
+
+		if (ret != size) {
+			offset += ret;
+			size -= ret;
+			goto retry;
+		}
+
+		done++;
+		put_page(p);
+		sk_mem_uncharge(sk, sg->length);
+		sg = sg_next(sg);
+	} while (sg);
+
+	memset(emsg, 0, sizeof(*emsg));
+
+	return 0;
+}
+
+static int espintcp_push_msgs(struct sock *sk)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+	struct espintcp_msg *emsg = &ctx->partial;
+	int err;
+
+	if (!emsg->len)
+		return 0;
+
+	if (ctx->tx_running)
+		return -EAGAIN;
+	ctx->tx_running = 1;
+
+	if (emsg->skb)
+		err = espintcp_sendskb_locked(sk, emsg, 0);
+	else
+		err = espintcp_sendskmsg_locked(sk, emsg, 0);
+	if (err == -EAGAIN) {
+		ctx->tx_running = 0;
+		return 0;
+	}
+	if (!err)
+		memset(emsg, 0, sizeof(*emsg));
+
+	ctx->tx_running = 0;
+
+	return err;
+}
+
+int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+	struct espintcp_msg *emsg = &ctx->partial;
+	unsigned int len;
+	int offset;
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		kfree_skb(skb);
+		return -ECONNRESET;
+	}
+
+	offset = skb_transport_offset(skb);
+	len = skb->len - offset;
+
+	espintcp_push_msgs(sk);
+
+	if (emsg->len) {
+		kfree_skb(skb);
+		return -ENOBUFS;
+	}
+
+	skb_set_owner_w(skb, sk);
+
+	emsg->offset = offset;
+	emsg->len = len;
+	emsg->skb = skb;
+
+	espintcp_push_msgs(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(espintcp_push_skb);
+
+static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+	struct espintcp_msg *emsg = &ctx->partial;
+	struct iov_iter pfx_iter;
+	struct kvec pfx_iov = {};
+	size_t msglen = size + 2;
+	char buf[2] = {0};
+	int err, end;
+
+	if (msg->msg_flags)
+		return -EOPNOTSUPP;
+
+	if (size > MAX_ESPINTCP_MSG)
+		return -EMSGSIZE;
+
+	if (msg->msg_controllen)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	err = espintcp_push_msgs(sk);
+	if (err < 0) {
+		err = -ENOBUFS;
+		goto unlock;
+	}
+
+	sk_msg_init(&emsg->skmsg);
+	while (1) {
+		/* only -ENOMEM is possible since we don't coalesce */
+		err = sk_msg_alloc(sk, &emsg->skmsg, msglen, 0);
+		if (!err)
+			break;
+
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto fail;
+	}
+
+	*((__be16 *)buf) = cpu_to_be16(msglen);
+	pfx_iov.iov_base = buf;
+	pfx_iov.iov_len = sizeof(buf);
+	iov_iter_kvec(&pfx_iter, WRITE, &pfx_iov, 1, pfx_iov.iov_len);
+
+	err = sk_msg_memcopy_from_iter(sk, &pfx_iter, &emsg->skmsg,
+				       pfx_iov.iov_len);
+	if (err < 0)
+		goto fail;
+
+	err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, &emsg->skmsg, size);
+	if (err < 0)
+		goto fail;
+
+	end = emsg->skmsg.sg.end;
+	emsg->len = size;
+	sk_msg_iter_var_prev(end);
+	sg_mark_end(sk_msg_elem(&emsg->skmsg, end));
+
+	tcp_rate_check_app_limited(sk);
+
+	err = espintcp_push_msgs(sk);
+	/* this message could be partially sent, keep it */
+	if (err < 0)
+		goto unlock;
+	release_sock(sk);
+
+	return size;
+
+fail:
+	sk_msg_free(sk, &emsg->skmsg);
+	memset(emsg, 0, sizeof(*emsg));
+unlock:
+	release_sock(sk);
+	return err;
+}
+
+static struct proto espintcp_prot __ro_after_init;
+static struct proto_ops espintcp_ops __ro_after_init;
+
+static void espintcp_data_ready(struct sock *sk)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+
+	strp_data_ready(&ctx->strp);
+}
+
+static void espintcp_tx_work(struct work_struct *work)
+{
+	struct espintcp_ctx *ctx = container_of(work,
+						struct espintcp_ctx, work);
+	struct sock *sk = ctx->strp.sk;
+
+	lock_sock(sk);
+	if (!ctx->tx_running)
+		espintcp_push_msgs(sk);
+	release_sock(sk);
+}
+
+static void espintcp_write_space(struct sock *sk)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+
+	schedule_work(&ctx->work);
+	ctx->saved_write_space(sk);
+}
+
+static void espintcp_destruct(struct sock *sk)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+
+	kfree(ctx);
+}
+
+bool tcp_is_ulp_esp(struct sock *sk)
+{
+	return sk->sk_prot == &espintcp_prot;
+}
+EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
+
+static int espintcp_init_sk(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct strp_callbacks cb = {
+		.rcv_msg = espintcp_rcv,
+		.parse_msg = espintcp_parse,
+	};
+	struct espintcp_ctx *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	err = strp_init(&ctx->strp, sk, &cb);
+	if (err)
+		goto free;
+
+	__sk_dst_reset(sk);
+
+	strp_check_rcv(&ctx->strp);
+	skb_queue_head_init(&ctx->ike_queue);
+	skb_queue_head_init(&ctx->out_queue);
+	sk->sk_prot = &espintcp_prot;
+	sk->sk_socket->ops = &espintcp_ops;
+	ctx->saved_data_ready = sk->sk_data_ready;
+	ctx->saved_write_space = sk->sk_write_space;
+	sk->sk_data_ready = espintcp_data_ready;
+	sk->sk_write_space = espintcp_write_space;
+	sk->sk_destruct = espintcp_destruct;
+	icsk->icsk_ulp_data = ctx;
+	INIT_WORK(&ctx->work, espintcp_tx_work);
+
+	/* avoid using task_frag */
+	sk->sk_allocation = GFP_ATOMIC;
+
+	return 0;
+
+free:
+	kfree(ctx);
+	return err;
+}
+
+static void espintcp_release(struct sock *sk)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+	struct sk_buff_head queue;
+	struct sk_buff *skb;
+
+	__skb_queue_head_init(&queue);
+	skb_queue_splice_init(&ctx->out_queue, &queue);
+
+	while ((skb = __skb_dequeue(&queue)))
+		espintcp_push_skb(sk, skb);
+
+	tcp_release_cb(sk);
+}
+
+static void espintcp_close(struct sock *sk, long timeout)
+{
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+	struct espintcp_msg *emsg = &ctx->partial;
+
+	strp_stop(&ctx->strp);
+
+	sk->sk_prot = &tcp_prot;
+	barrier();
+
+	cancel_work_sync(&ctx->work);
+	strp_done(&ctx->strp);
+
+	skb_queue_purge(&ctx->out_queue);
+	skb_queue_purge(&ctx->ike_queue);
+
+	if (emsg->len) {
+		if (emsg->skb)
+			kfree_skb(emsg->skb);
+		else
+			sk_msg_free(sk, &emsg->skmsg);
+	}
+
+	tcp_close(sk, timeout);
+}
+
+static __poll_t espintcp_poll(struct file *file, struct socket *sock,
+			      poll_table *wait)
+{
+	__poll_t mask = datagram_poll(file, sock, wait);
+	struct sock *sk = sock->sk;
+	struct espintcp_ctx *ctx = espintcp_getctx(sk);
+
+	if (!skb_queue_empty(&ctx->ike_queue))
+		mask |= EPOLLIN | EPOLLRDNORM;
+
+	return mask;
+}
+
+static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
+	.name = "espintcp",
+	.owner = THIS_MODULE,
+	.init = espintcp_init_sk,
+};
+
+void __init espintcp_init(void)
+{
+	memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot));
+	memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops));
+	espintcp_prot.sendmsg = espintcp_sendmsg;
+	espintcp_prot.recvmsg = espintcp_recvmsg;
+	espintcp_prot.close = espintcp_close;
+	espintcp_prot.release_cb = espintcp_release;
+	espintcp_ops.poll = espintcp_poll;
+
+	tcp_register_ulp(&espintcp_ulp);
+}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1070dfece76b..893072f2e651 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,6 +39,9 @@
 #ifdef CONFIG_XFRM_STATISTICS
 #include <net/snmp.h>
 #endif
+#ifdef CONFIG_XFRM_ESPINTCP
+#include <net/espintcp.h>
+#endif
 
 #include "xfrm_hash.h"
 
@@ -4156,6 +4159,10 @@ void __init xfrm_init(void)
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
 
+#ifdef CONFIG_XFRM_ESPINTCP
+	espintcp_init();
+#endif
+
 	RCU_INIT_POINTER(xfrm_if_cb, NULL);
 	synchronize_rcu();
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index c6f3c4a1bd99..acef2d54f869 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -668,6 +668,9 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		net->xfrm.state_num--;
 		spin_unlock(&net->xfrm.xfrm_state_lock);
 
+		if (x->encap_sk)
+			sock_put(rcu_dereference_raw(x->encap_sk));
+
 		xfrm_dev_state_delete(x);
 
 		/* All xfrm_state objects are created by xfrm_state_alloc.
-- 
2.22.0


^ permalink raw reply related

* [PATCH ipsec-next 6/7] esp4: split esp_output_udp_encap and introduce esp_output_encap
From: Sabrina Dubroca @ 2019-08-21 21:46 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Steffen Klassert, Sabrina Dubroca
In-Reply-To: <cover.1566395202.git.sd@queasysnail.net>

Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 net/ipv4/esp4.c | 57 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index c5d826642229..033c61d27148 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -225,45 +225,62 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
 	tail[plen - 1] = proto;
 }
 
-static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
+					       int encap_type,
+					       struct esp_info *esp,
+					       __be16 sport,
+					       __be16 dport)
 {
-	int encap_type;
 	struct udphdr *uh;
 	__be32 *udpdata32;
-	__be16 sport, dport;
-	struct xfrm_encap_tmpl *encap = x->encap;
-	struct ip_esp_hdr *esph = esp->esph;
 	unsigned int len;
 
-	spin_lock_bh(&x->lock);
-	sport = encap->encap_sport;
-	dport = encap->encap_dport;
-	encap_type = encap->encap_type;
-	spin_unlock_bh(&x->lock);
-
 	len = skb->len + esp->tailen - skb_transport_offset(skb);
 	if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
-		return -EMSGSIZE;
+		return ERR_PTR(-EMSGSIZE);
 
-	uh = (struct udphdr *)esph;
+	uh = (struct udphdr *)esp->esph;
 	uh->source = sport;
 	uh->dest = dport;
 	uh->len = htons(len);
 	uh->check = 0;
 
+	*skb_mac_header(skb) = IPPROTO_UDP;
+
+	if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
+		udpdata32 = (__be32 *)(uh + 1);
+		udpdata32[0] = udpdata32[1] = 0;
+		return (struct ip_esp_hdr *)(udpdata32 + 2);
+	}
+
+	return (struct ip_esp_hdr *)(uh + 1);
+}
+
+static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
+			    struct esp_info *esp)
+{
+	struct xfrm_encap_tmpl *encap = x->encap;
+	struct ip_esp_hdr *esph;
+	__be16 sport, dport;
+	int encap_type;
+
+	spin_lock_bh(&x->lock);
+	sport = encap->encap_sport;
+	dport = encap->encap_dport;
+	encap_type = encap->encap_type;
+	spin_unlock_bh(&x->lock);
+
 	switch (encap_type) {
 	default:
 	case UDP_ENCAP_ESPINUDP:
-		esph = (struct ip_esp_hdr *)(uh + 1);
-		break;
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
-		udpdata32 = (__be32 *)(uh + 1);
-		udpdata32[0] = udpdata32[1] = 0;
-		esph = (struct ip_esp_hdr *)(udpdata32 + 2);
+		esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
 		break;
 	}
 
-	*skb_mac_header(skb) = IPPROTO_UDP;
+	if (IS_ERR(esph))
+		return PTR_ERR(esph);
+
 	esp->esph = esph;
 
 	return 0;
@@ -281,7 +298,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 
 	/* this is non-NULL only with UDP Encapsulation */
 	if (x->encap) {
-		int err = esp_output_udp_encap(x, skb, esp);
+		int err = esp_output_encap(x, skb, esp);
 
 		if (err < 0)
 			return err;
-- 
2.22.0


^ permalink raw reply related

* Re: [PATCH] bpf: handle 32-bit zext during constant blinding
From: Jiong Wang @ 2019-08-21 21:51 UTC (permalink / raw)
  To: Naveen N. Rao
  Cc: Alexei Starovoitov, Daniel Borkmann, Jiong Wang, Michael Ellerman,
	bpf, linuxppc-dev, netdev, linux-kernel
In-Reply-To: <20190821192358.31922-1-naveen.n.rao@linux.vnet.ibm.com>


Naveen N. Rao writes:

> Since BPF constant blinding is performed after the verifier pass, the
> ALU32 instructions inserted for doubleword immediate loads don't have a
> corresponding zext instruction. This is causing a kernel oops on powerpc
> and can be reproduced by running 'test_cgroup_storage' with
> bpf_jit_harden=2.
>
> Fix this by emitting BPF_ZEXT during constant blinding if
> prog->aux->verifier_zext is set.
>
> Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis result")
> Reported-by: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Thanks for the fix.

Reviewed-by: Jiong Wang <jiong.wang@netronome.com>

Just two other comments during review in case I am wrong on somewhere.

  - Use verifier_zext instead of bpf_jit_needs_zext() seems better, even
    though the latter could avoid extending function argument.

    Because JIT back-ends look at verifier_zext, true means zext inserted
    by verifier so JITs won't do the code-gen.

    Use verifier_zext is sort of keeping JIT blinding the same behaviour
    has verifier even though blinding doesn't belong to verifier, but for
    such insn patching, it could be seen as a extension of verifier,
    therefore use verifier_zext seems better than bpf_jit_needs_zext() to
    me.
   
  - JIT blinding is also escaping the HI32 randomization which happens
    inside verifier, otherwise x86-64 regression should have caught this issue.

Regards,
Jiong

> ---
> Changes since RFC:
> - Removed changes to ALU32 and JMP32 ops since those don't alter program 
>   execution, and the verifier would have already accounted for them.  
>
>
>  kernel/bpf/core.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 8191a7db2777..66088a9e9b9e 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -890,7 +890,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
>  
>  static int bpf_jit_blind_insn(const struct bpf_insn *from,
>  			      const struct bpf_insn *aux,
> -			      struct bpf_insn *to_buff)
> +			      struct bpf_insn *to_buff,
> +			      bool emit_zext)
>  {
>  	struct bpf_insn *to = to_buff;
>  	u32 imm_rnd = get_random_int();
> @@ -1005,6 +1006,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
>  	case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
>  		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
>  		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
> +		if (emit_zext)
> +			*to++ = BPF_ZEXT_REG(BPF_REG_AX);
>  		*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
>  		break;
>  
> @@ -1088,7 +1091,8 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
>  		    insn[1].code == 0)
>  			memcpy(aux, insn, sizeof(aux));
>  
> -		rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
> +		rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
> +						clone->aux->verifier_zext);
>  		if (!rewritten)
>  			continue;


^ permalink raw reply

* New skb extension for use by LSMs (skb "security blob")?
From: Paul Moore @ 2019-08-21 22:00 UTC (permalink / raw)
  To: netdev; +Cc: linux-security-module, selinux

Hello netdev,

I was just made aware of the skb extension work, and it looks very
appealing from a LSM perspective.  As some of you probably remember,
we (the LSM folks) have wanted a proper security blob in the skb for
quite some time, but netdev has been resistant to this idea thus far.

If I were to propose a patchset to add a SKB_EXT_SECURITY skb
extension (a single extension ID to be shared among the different
LSMs), would that be something that netdev would consider merging, or
is there still a philosophical objection to things like this?

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: Unable to create htb tc classes more than 64K
From: Cong Wang @ 2019-08-21 22:06 UTC (permalink / raw)
  To: Akshat Kakkar; +Cc: NetFilter, lartc, netdev
In-Reply-To: <CAA5aLPjzX+9YFRGgCgceHjkU0=e6x8YMENfp_cC9fjfHYK3e+A@mail.gmail.com>

On Sat, Aug 17, 2019 at 12:04 PM Akshat Kakkar <akshat.1984@gmail.com> wrote:
> I am using ipset +  iptables to classify and not filters. Besides, if
> tc is allowing me to define qdisc -> classes -> qdsic -> classes
> (1,2,3 ...) sort of structure (ie like the one shown in ascii tree)
> then how can those lowest child classes be actually used or consumed?

Just install tc filters on the lower level too.

^ permalink raw reply

* [PATCH net-next] net/ncsi: update response packet length for GCPS/GNS/GNPTS commands
From: Ben Wei @ 2019-08-21 22:08 UTC (permalink / raw)
  To: sam@mendozajonas.com, davem@davemloft.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, openbmc@lists.ozlabs.org
  Cc: Ben Wei

Update response packet length for the following commands per NC-SI spec
- Get Controller Packet Statistics
- Get NC-SI Statistics
- Get NC-SI Pass-through Statistics command

Signed-off-by: Ben Wei <benwei@fb.com>
---
 net/ncsi/ncsi-rsp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 7581bf919885..5254004f2b42 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -1083,9 +1083,9 @@ static struct ncsi_rsp_handler {
 	{ NCSI_PKT_RSP_GVI,    40, ncsi_rsp_handler_gvi     },
 	{ NCSI_PKT_RSP_GC,     32, ncsi_rsp_handler_gc      },
 	{ NCSI_PKT_RSP_GP,     -1, ncsi_rsp_handler_gp      },
-	{ NCSI_PKT_RSP_GCPS,  172, ncsi_rsp_handler_gcps    },
-	{ NCSI_PKT_RSP_GNS,   172, ncsi_rsp_handler_gns     },
-	{ NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts   },
+	{ NCSI_PKT_RSP_GCPS,  204, ncsi_rsp_handler_gcps    },
+	{ NCSI_PKT_RSP_GNS,    32, ncsi_rsp_handler_gns     },
+	{ NCSI_PKT_RSP_GNPTS,  48, ncsi_rsp_handler_gnpts   },
 	{ NCSI_PKT_RSP_GPS,     8, ncsi_rsp_handler_gps     },
 	{ NCSI_PKT_RSP_OEM,    -1, ncsi_rsp_handler_oem     },
 	{ NCSI_PKT_RSP_PLDM,    0, NULL                     },
--
2.17.1


^ permalink raw reply

* RE: [PATCH net-next] net/ncsi: update response packet length for GCPS/GNS/GNPTS commands
From: Justin.Lee1 @ 2019-08-21 22:25 UTC (permalink / raw)
  To: benwei, sam, davem, netdev, linux-kernel, openbmc
In-Reply-To: <CH2PR15MB3686567EBCBE71B41C5F079AA3AA0@CH2PR15MB3686.namprd15.prod.outlook.com>

Hi Ben,

Those are correct adjustment and I had tested previously. 

Thanks,
Justin


Reviewed-by: Justin Lee <justin.lee1@dell.com>


> Update response packet length for the following commands per NC-SI spec
> - Get Controller Packet Statistics
> - Get NC-SI Statistics
> - Get NC-SI Pass-through Statistics command
> 
> Signed-off-by: Ben Wei <benwei@fb.com>
> ---
>  net/ncsi/ncsi-rsp.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 7581bf919885..5254004f2b42 100644
> --- a/net/ncsi/ncsi-rsp.c
> +++ b/net/ncsi/ncsi-rsp.c
> @@ -1083,9 +1083,9 @@ static struct ncsi_rsp_handler {
>  	{ NCSI_PKT_RSP_GVI,    40, ncsi_rsp_handler_gvi     },
>  	{ NCSI_PKT_RSP_GC,     32, ncsi_rsp_handler_gc      },
>  	{ NCSI_PKT_RSP_GP,     -1, ncsi_rsp_handler_gp      },
> -	{ NCSI_PKT_RSP_GCPS,  172, ncsi_rsp_handler_gcps    },
> -	{ NCSI_PKT_RSP_GNS,   172, ncsi_rsp_handler_gns     },
> -	{ NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts   },
> +	{ NCSI_PKT_RSP_GCPS,  204, ncsi_rsp_handler_gcps    },
> +	{ NCSI_PKT_RSP_GNS,    32, ncsi_rsp_handler_gns     },
> +	{ NCSI_PKT_RSP_GNPTS,  48, ncsi_rsp_handler_gnpts   },
>  	{ NCSI_PKT_RSP_GPS,     8, ncsi_rsp_handler_gps     },
>  	{ NCSI_PKT_RSP_OEM,    -1, ncsi_rsp_handler_oem     },
>  	{ NCSI_PKT_RSP_PLDM,    0, NULL                     },
> --
> 2.17.1


^ permalink raw reply

* Re: [PATCH net-next] net: bcmgenet: use devm_platform_ioremap_resource() to simplify code
From: Doug Berger @ 2019-08-21 22:27 UTC (permalink / raw)
  To: YueHaibing, davem, f.fainelli, bcm-kernel-feedback-list
  Cc: linux-kernel, netdev
In-Reply-To: <20190821134131.57780-1-yuehaibing@huawei.com>

On 8/21/19 6:41 AM, YueHaibing wrote:
> Use devm_platform_ioremap_resource() to simplify the code a bit.
> This is detected by coccinelle.
> 
> Reported-by: Hulk Robot <hulkci@huawei.com>
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> ---
>  drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> index d3a0b61..2108e59 100644
> --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> @@ -3437,7 +3437,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
>  	struct bcmgenet_priv *priv;
>  	struct net_device *dev;
>  	const void *macaddr;
> -	struct resource *r;
>  	unsigned int i;
>  	int err = -EIO;
>  	const char *phy_mode_str;
> @@ -3477,8 +3476,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
>  		macaddr = pd->mac_address;
>  	}
>  
> -	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> -	priv->base = devm_ioremap_resource(&pdev->dev, r);
> +	priv->base = devm_platform_ioremap_resource(pdev, 0);
>  	if (IS_ERR(priv->base)) {
>  		err = PTR_ERR(priv->base);
>  		goto err;
> 

Acked-by: Doug Berger <opendmb@gmail.com>

^ permalink raw reply

* KMSAN: uninit-value in rtm_new_nexthop
From: syzbot @ 2019-08-21 22:38 UTC (permalink / raw)
  To: davem, dsahern, glider, kuznet, linux-kernel, netdev,
	syzkaller-bugs, yoshfuji

Hello,

syzbot found the following crash on:

HEAD commit:    61ccdad1 Revert "drm/bochs: Use shadow buffer for bochs fr..
git tree:       https://github.com/google/kmsan.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=1596d33c600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=27abc558ecb16a3b
dashboard link: https://syzkaller.appspot.com/bug?extid=4f3abbb335d1bed2287c
compiler:       clang version 9.0.0 (/home/glider/llvm/clang  
80fee25776c2fb61e74c1ecb1a523375c2500b69)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=15733302600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1028e9e2600000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+4f3abbb335d1bed2287c@syzkaller.appspotmail.com

==================================================================
BUG: KMSAN: uninit-value in rtm_to_nh_config net/ipv4/nexthop.c:1317  
[inline]
BUG: KMSAN: uninit-value in rtm_new_nexthop+0x447/0x98e0  
net/ipv4/nexthop.c:1474
CPU: 0 PID: 11026 Comm: syz-executor768 Not tainted 5.3.0-rc3+ #17
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x191/0x1f0 lib/dump_stack.c:113
  kmsan_report+0x162/0x2d0 mm/kmsan/kmsan_report.c:109
  __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:294
  rtm_to_nh_config net/ipv4/nexthop.c:1317 [inline]
  rtm_new_nexthop+0x447/0x98e0 net/ipv4/nexthop.c:1474
  rtnetlink_rcv_msg+0x115a/0x1580 net/core/rtnetlink.c:5223
  netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
  rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5241
  netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
  netlink_unicast+0xf6c/0x1050 net/netlink/af_netlink.c:1328
  netlink_sendmsg+0x110f/0x1330 net/netlink/af_netlink.c:1917
  sock_sendmsg_nosec net/socket.c:637 [inline]
  sock_sendmsg net/socket.c:657 [inline]
  ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311
  __sys_sendmsg net/socket.c:2356 [inline]
  __do_sys_sendmsg net/socket.c:2365 [inline]
  __se_sys_sendmsg+0x305/0x460 net/socket.c:2363
  __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2363
  do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:297
  entry_SYSCALL_64_after_hwframe+0x63/0xe7
RIP: 0033:0x4401e9
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffef5128b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004401e9
RDX: 0000000000000000 RSI: 0000000020000400 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401a70
R13: 0000000000401b00 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:187 [inline]
  kmsan_internal_poison_shadow+0x53/0xa0 mm/kmsan/kmsan.c:146
  kmsan_slab_alloc+0xaa/0x120 mm/kmsan/kmsan_hooks.c:175
  slab_alloc_node mm/slub.c:2790 [inline]
  __kmalloc_node_track_caller+0xb55/0x1320 mm/slub.c:4388
  __kmalloc_reserve net/core/skbuff.c:141 [inline]
  __alloc_skb+0x306/0xa10 net/core/skbuff.c:209
  alloc_skb include/linux/skbuff.h:1056 [inline]
  netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline]
  netlink_sendmsg+0x783/0x1330 net/netlink/af_netlink.c:1892
  sock_sendmsg_nosec net/socket.c:637 [inline]
  sock_sendmsg net/socket.c:657 [inline]
  ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311
  __sys_sendmsg net/socket.c:2356 [inline]
  __do_sys_sendmsg net/socket.c:2365 [inline]
  __se_sys_sendmsg+0x305/0x460 net/socket.c:2363
  __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2363
  do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:297
  entry_SYSCALL_64_after_hwframe+0x63/0xe7
==================================================================


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* KASAN: null-ptr-deref Write in queue_work_on
From: syzbot @ 2019-08-21 22:38 UTC (permalink / raw)
  To: davem, linux-can, linux-kernel, mkl, netdev, syzkaller-bugs, wg

Hello,

syzbot found the following crash on:

HEAD commit:    6e625a1a Merge tag 'xtensa-20190816' of git://github.com/j..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=174e04ac600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=3ff364e429585cf2
dashboard link: https://syzkaller.appspot.com/bug?extid=017e491ae13c0068598a
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=1327d9e2600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=16c4fc4c600000

Bisection is inconclusive: the first bad commit could be any of:

569dbb88 Linux 4.13
  that is not the commit

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=17520702600000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+017e491ae13c0068598a@syzkaller.appspotmail.com

==================================================================
BUG: KASAN: null-ptr-deref in test_and_set_bit  
include/asm-generic/bitops-instrumented.h:143 [inline]
BUG: KASAN: null-ptr-deref in queue_work_on+0xa6/0x210  
kernel/workqueue.c:1517
Write of size 8 at addr 0000000000000050 by task syz-executor935/9691

CPU: 0 PID: 9691 Comm: syz-executor935 Not tainted 5.3.0-rc4+ #113
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x172/0x1f0 lib/dump_stack.c:113
  __kasan_report.cold+0x5/0x36 mm/kasan/report.c:486
  kasan_report+0x12/0x17 mm/kasan/common.c:612
  check_memory_region_inline mm/kasan/generic.c:185 [inline]
  check_memory_region+0x134/0x1a0 mm/kasan/generic.c:192
  __kasan_check_write+0x14/0x20 mm/kasan/common.c:98
  test_and_set_bit include/asm-generic/bitops-instrumented.h:143 [inline]
  queue_work_on+0xa6/0x210 kernel/workqueue.c:1517
  queue_work include/linux/workqueue.h:490 [inline]
  schedule_work include/linux/workqueue.h:548 [inline]
  slcan_write_wakeup+0x66/0x90 drivers/net/can/slcan.c:348
  tty_wakeup+0xe9/0x120 drivers/tty/tty_io.c:535
  pty_unthrottle+0x37/0x60 drivers/tty/pty.c:95
  tty_unthrottle+0xab/0x110 drivers/tty/tty_ioctl.c:139
  __tty_perform_flush+0x1b3/0x200 drivers/tty/tty_ioctl.c:861
  n_tty_ioctl_helper+0x1cc/0x3b0 drivers/tty/tty_ioctl.c:937
  n_tty_ioctl+0x59/0x370 drivers/tty/n_tty.c:2466
  tty_ioctl+0xaf9/0x14f0 drivers/tty/tty_io.c:2666
  vfs_ioctl fs/ioctl.c:46 [inline]
  file_ioctl fs/ioctl.c:509 [inline]
  do_vfs_ioctl+0xdb6/0x13e0 fs/ioctl.c:696
  ksys_ioctl+0xab/0xd0 fs/ioctl.c:713
  __do_sys_ioctl fs/ioctl.c:720 [inline]
  __se_sys_ioctl fs/ioctl.c:718 [inline]
  __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718
  do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446859
Code: e8 9c b4 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 eb 07 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f17a0a3fd18 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00000000006dbc38 RCX: 0000000000446859
RDX: 0000000000000000 RSI: 000000000000540b RDI: 0000000000000003
RBP: 00000000006dbc30 R08: 00007f17a0a40700 R09: 0000000000000000
R10: 00007f17a0a40700 R11: 0000000000000246 R12: 00000000006dbc3c
R13: 00007ffdfd0bdb5f R14: 00007f17a0a409c0 R15: 20c49ba5e353f7cf
==================================================================


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* KMSAN: uninit-value in batadv_iv_send_outstanding_bat_ogm_packet
From: syzbot @ 2019-08-21 22:38 UTC (permalink / raw)
  To: a, b.a.t.m.a.n, davem, glider, linux-kernel, mareklindner, netdev,
	sw, syzkaller-bugs

Hello,

syzbot found the following crash on:

HEAD commit:    61ccdad1 Revert "drm/bochs: Use shadow buffer for bochs fr..
git tree:       https://github.com/google/kmsan.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=13d6909c600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=27abc558ecb16a3b
dashboard link: https://syzkaller.appspot.com/bug?extid=355cab184197dbbfa384
compiler:       clang version 9.0.0 (/home/glider/llvm/clang  
80fee25776c2fb61e74c1ecb1a523375c2500b69)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=1612b9d2600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=11d388ac600000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+355cab184197dbbfa384@syzkaller.appspotmail.com

==================================================================
BUG: KMSAN: uninit-value in batadv_iv_ogm_send_to_if  
net/batman-adv/bat_iv_ogm.c:317 [inline]
BUG: KMSAN: uninit-value in batadv_iv_ogm_emit  
net/batman-adv/bat_iv_ogm.c:383 [inline]
BUG: KMSAN: uninit-value in  
batadv_iv_send_outstanding_bat_ogm_packet+0x6cd/0xcc0  
net/batman-adv/bat_iv_ogm.c:1657
CPU: 1 PID: 290 Comm: kworker/u4:7 Not tainted 5.3.0-rc3+ #17
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x191/0x1f0 lib/dump_stack.c:113
  kmsan_report+0x162/0x2d0 mm/kmsan/kmsan_report.c:109
  __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:294
  batadv_iv_ogm_send_to_if net/batman-adv/bat_iv_ogm.c:317 [inline]
  batadv_iv_ogm_emit net/batman-adv/bat_iv_ogm.c:383 [inline]
  batadv_iv_send_outstanding_bat_ogm_packet+0x6cd/0xcc0  
net/batman-adv/bat_iv_ogm.c:1657
  process_one_work+0x1572/0x1ef0 kernel/workqueue.c:2269
  worker_thread+0x111b/0x2460 kernel/workqueue.c:2415
  kthread+0x4b5/0x4f0 kernel/kthread.c:256
  ret_from_fork+0x35/0x40 arch/x86/entry/entry_64.S:355

Uninit was created at:
  kmsan_save_stack_with_flags+0x37/0x70 mm/kmsan/kmsan.c:187
  kmsan_internal_alloc_meta_for_pages+0x123/0x510 mm/kmsan/kmsan_hooks.c:114
  kmsan_alloc_page+0x7a/0xf0 mm/kmsan/kmsan_hooks.c:244
  __alloc_pages_nodemask+0x142d/0x5fa0 mm/page_alloc.c:4768
  __alloc_pages include/linux/gfp.h:475 [inline]
  __alloc_pages_node include/linux/gfp.h:488 [inline]
  alloc_pages_node include/linux/gfp.h:502 [inline]
  __page_frag_cache_refill mm/page_alloc.c:4843 [inline]
  page_frag_alloc+0x35b/0x890 mm/page_alloc.c:4873
  __napi_alloc_skb+0x195/0x980 net/core/skbuff.c:519
  napi_alloc_skb include/linux/skbuff.h:2808 [inline]
  page_to_skb+0x134/0x1150 drivers/net/virtio_net.c:384
  receive_mergeable drivers/net/virtio_net.c:924 [inline]
  receive_buf+0xe7b/0x8810 drivers/net/virtio_net.c:1033
  virtnet_receive drivers/net/virtio_net.c:1323 [inline]
  virtnet_poll+0x666/0x19d0 drivers/net/virtio_net.c:1428
  napi_poll net/core/dev.c:6347 [inline]
  net_rx_action+0x74b/0x1950 net/core/dev.c:6413
  __do_softirq+0x4a1/0x83a kernel/softirq.c:293
  invoke_softirq kernel/softirq.c:375 [inline]
  irq_exit+0x230/0x280 kernel/softirq.c:416
  exiting_irq arch/x86/include/asm/apic.h:537 [inline]
  do_IRQ+0x20d/0x3a0 arch/x86/kernel/irq.c:259
  ret_from_intr+0x0/0x33
  kmsan_get_shadow_origin_ptr+0x6/0x3a0 mm/kmsan/kmsan.c:656
  __msan_metadata_ptr_for_load_8+0x10/0x20 mm/kmsan/kmsan_instr.c:55
  compound_head include/linux/compiler.h:206 [inline]
  PageReferenced include/linux/page-flags.h:315 [inline]
  mark_page_accessed+0x30c/0xa00 mm/swap.c:391
  touch_buffer fs/buffer.c:60 [inline]
  __find_get_block+0x1681/0x19e0 fs/buffer.c:1303
  __getblk_gfp+0xc5/0x1080 fs/buffer.c:1321
  sb_getblk include/linux/buffer_head.h:325 [inline]
  __ext4_get_inode_loc+0x647/0x1c80 fs/ext4/inode.c:4611
  ext4_get_inode_loc fs/ext4/inode.c:4726 [inline]
  ext4_reserve_inode_write+0x15d/0x430 fs/ext4/inode.c:5919
  ext4_mark_inode_dirty+0x2dd/0xca0 fs/ext4/inode.c:6071
  ext4_dirty_inode+0x187/0x1d0 fs/ext4/inode.c:6110
  __mark_inode_dirty+0x486/0x1380 fs/fs-writeback.c:2170
  mark_inode_dirty include/linux/fs.h:2138 [inline]
  generic_write_end+0x3f7/0x460 fs/buffer.c:2164
  ext4_da_write_end+0x1050/0x1240 fs/ext4/inode.c:3217
  generic_perform_write+0x618/0x990 mm/filemap.c:3341
  __generic_file_write_iter+0x421/0xa30 mm/filemap.c:3459
  ext4_file_write_iter+0xc97/0x2010 fs/ext4/file.c:270
  call_write_iter include/linux/fs.h:1870 [inline]
  new_sync_write fs/read_write.c:483 [inline]
  __vfs_write+0xa2c/0xcb0 fs/read_write.c:496
  vfs_write+0x481/0x920 fs/read_write.c:558
  ksys_write+0x265/0x430 fs/read_write.c:611
  __do_sys_write fs/read_write.c:623 [inline]
  __se_sys_write+0x92/0xb0 fs/read_write.c:620
  __x64_sys_write+0x4a/0x70 fs/read_write.c:620
  do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:297
  entry_SYSCALL_64_after_hwframe+0x63/0xe7
==================================================================


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* Re: New skb extension for use by LSMs (skb "security blob")?
From: David Miller @ 2019-08-21 22:50 UTC (permalink / raw)
  To: paul; +Cc: netdev, linux-security-module, selinux
In-Reply-To: <CAHC9VhSz1_KA1tCJtNjwK26BOkGhKGbPT7v1O82mWPduvWwd4A@mail.gmail.com>

From: Paul Moore <paul@paul-moore.com>
Date: Wed, 21 Aug 2019 18:00:09 -0400

> I was just made aware of the skb extension work, and it looks very
> appealing from a LSM perspective.  As some of you probably remember,
> we (the LSM folks) have wanted a proper security blob in the skb for
> quite some time, but netdev has been resistant to this idea thus far.
> 
> If I were to propose a patchset to add a SKB_EXT_SECURITY skb
> extension (a single extension ID to be shared among the different
> LSMs), would that be something that netdev would consider merging, or
> is there still a philosophical objection to things like this?

Unlike it's main intended user (MPTCP), it sounds like LSM's would use
this in a way such that it would be enabled on most systems all the
time.

That really defeats the whole purpose of making it dynamic. :-/

^ permalink raw reply

* Re: various TLS bug fixes...
From: Jakub Kicinski @ 2019-08-21 22:55 UTC (permalink / raw)
  To: John Fastabend; +Cc: David Miller, netdev
In-Reply-To: <20190821110346.449c5612@cakuba.netronome.com>

On Wed, 21 Aug 2019 11:03:46 -0700, Jakub Kicinski wrote:
> On Tue, 20 Aug 2019 23:51:12 -0700, Jakub Kicinski wrote:
> > > If you have more details I can also spend some cycles looking into it.    
> > 
> > Awesome, I'll let you know what the details are as soon as I get them.  
> 
> Just a quick update on that.
> 
> The test case is nginx running with ktls offload.
> 
> The client (hurl or openssl client) requests a file of ~2M, but only
> 44K ever gets across (not even sure which side sees an error at this
> point, outputs are pretty quiet).

I had a look, it's this:

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 6848a8196711..8a05e4bf1c58 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -370,7 +370,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
        lock_sock(sk);
 
-       if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
+       ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo);
+       if (ret)
                goto send_end;
 
        if (unlikely(msg->msg_controllen)) {

Which is commit 150085791afb ("net/tls: Fixed return value when
tls_complete_pending_work() fails"). 

I also tried to test what we described previously for sk_write_space
and it seems to work okay (although TBH I'm not sure my testing is 100%
here, I can't reliably trigger that race in the first place).

^ permalink raw reply related

* Re: [net-next 00/15][pull request] 40GbE Intel Wired LAN Driver Updates 2019-08-21
From: David Miller @ 2019-08-21 22:55 UTC (permalink / raw)
  To: jakub.kicinski; +Cc: jeffrey.t.kirsher, netdev, nhorman, sassmann
In-Reply-To: <20190821143106.0a2ac71d@cakuba.netronome.com>

From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Aug 2019 14:31:06 -0700

> On Wed, 21 Aug 2019 13:16:08 -0700, Jeff Kirsher wrote:
>> This series contains updates to i40e driver only.
> 
> Patch 12 should really be squashed into 13, 7 and 9 could also be
> combined. But not a big deal, I guess.

Agreed, publicizing a function is not a discreet change within the
same module.  Whereas doing so in order to facilitate a new external
user as part of another change, on the other hand, is.

^ permalink raw reply

* [PATCH net-next 02/10] net: dsa: mv88e6xxx: remove extra newline
From: Marek Behún @ 2019-08-21 23:27 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	Marek Behún
In-Reply-To: <20190821232724.1544-1-marek.behun@nic.cz>

There are two newlines separating mv88e6390_hidden_wait and
mv88e6390_hidden_read. Remove one.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d0bf98c10b2b..176173d96512 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2347,7 +2347,6 @@ static int mv88e6390_hidden_wait(struct mv88e6xxx_chip *chip)
 				  PORT_RESERVED_1A, bit, 0);
 }
 
-
 static int mv88e6390_hidden_read(struct mv88e6xxx_chip *chip, int port,
 				  int reg, u16 *val)
 {
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 05/10] net: dsa: mv88e6xxx: create chip->info->ops->serdes_get_lane method
From: Marek Behún @ 2019-08-21 23:27 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	Marek Behún
In-Reply-To: <20190821232724.1544-1-marek.behun@nic.cz>

Create a serdes_get_lane() method in the mv88e6xxx operations structure.
Use it instead of calling the different implementations.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
---
 drivers/net/dsa/mv88e6xxx/chip.c   |  6 ++++++
 drivers/net/dsa/mv88e6xxx/chip.h   |  3 +++
 drivers/net/dsa/mv88e6xxx/port.c   |  4 ++--
 drivers/net/dsa/mv88e6xxx/serdes.c | 29 +++++++++++++++++------------
 drivers/net/dsa/mv88e6xxx/serdes.h |  2 ++
 5 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 2dab46ad1d63..2340634aab27 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3256,6 +3256,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.serdes_get_lane = mv88e6390_serdes_get_lane,
 	.serdes_irq_setup = mv88e6390_serdes_irq_setup,
 	.serdes_irq_free = mv88e6390_serdes_irq_free,
 	.gpio_ops = &mv88e6352_gpio_ops,
@@ -3302,6 +3303,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390x_serdes_power,
+	.serdes_get_lane = mv88e6390x_serdes_get_lane,
 	.serdes_irq_setup = mv88e6390x_serdes_irq_setup,
 	.serdes_irq_free = mv88e6390x_serdes_irq_free,
 	.gpio_ops = &mv88e6352_gpio_ops,
@@ -3348,6 +3350,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.serdes_get_lane = mv88e6390_serdes_get_lane,
 	.serdes_irq_setup = mv88e6390_serdes_irq_setup,
 	.serdes_irq_free = mv88e6390_serdes_irq_free,
 	.avb_ops = &mv88e6390_avb_ops,
@@ -3484,6 +3487,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.serdes_get_lane = mv88e6390_serdes_get_lane,
 	.serdes_irq_setup = mv88e6390_serdes_irq_setup,
 	.serdes_irq_free = mv88e6390_serdes_irq_free,
 	.gpio_ops = &mv88e6352_gpio_ops,
@@ -3801,6 +3805,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.serdes_get_lane = mv88e6390_serdes_get_lane,
 	.serdes_irq_setup = mv88e6390_serdes_irq_setup,
 	.serdes_irq_free = mv88e6390_serdes_irq_free,
 	.gpio_ops = &mv88e6352_gpio_ops,
@@ -3851,6 +3856,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390x_serdes_power,
+	.serdes_get_lane = mv88e6390x_serdes_get_lane,
 	.serdes_irq_setup = mv88e6390x_serdes_irq_setup,
 	.serdes_irq_free = mv88e6390x_serdes_irq_free,
 	.gpio_ops = &mv88e6352_gpio_ops,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index a406be2f5652..35faf5be598b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -443,6 +443,9 @@ struct mv88e6xxx_ops {
 	/* Power on/off a SERDES interface */
 	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
 
+	/* SERDES lane mapping */
+	int (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port);
+
 	/* SERDES interrupt handling */
 	int (*serdes_irq_setup)(struct mv88e6xxx_chip *chip, int port);
 	void (*serdes_irq_free)(struct mv88e6xxx_chip *chip, int port);
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index c95cdb73e5a2..092176fd3d90 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -434,7 +434,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	if (cmode == chip->ports[port].cmode)
 		return 0;
 
-	lane = mv88e6390x_serdes_get_lane(chip, port);
+	lane = mv88e6xxx_serdes_get_lane(chip, port);
 	if (lane < 0 && lane != -ENODEV)
 		return lane;
 
@@ -466,7 +466,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 
 		chip->ports[port].cmode = cmode;
 
-		lane = mv88e6390x_serdes_get_lane(chip, port);
+		lane = mv88e6xxx_serdes_get_lane(chip, port);
 		if (lane < 0)
 			return lane;
 
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 0f3d7cbb696b..011322fa24ae 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -286,10 +286,19 @@ void mv88e6352_serdes_irq_free(struct mv88e6xxx_chip *chip, int port)
 	chip->ports[port].serdes_irq = 0;
 }
 
-/* Return the SERDES lane address a port is using. Only Ports 9 and 10
- * have SERDES lanes. Returns -ENODEV if a port does not have a lane.
+/* Return the SERDES lane address a port is using. If a port has multiple lanes,
+ * should return the first lane the port is using. Should return -ENODEV if
+ * a port does not have a lane.
  */
-static int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+{
+	if (!chip->info->ops->serdes_get_lane)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->serdes_get_lane(chip, port);
+}
+
+int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
 
@@ -311,10 +320,6 @@ static int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	}
 }
 
-/* Return the SERDES lane address a port is using. Ports 9 and 10 can
- * use multiple lanes. If so, return the first lane the port uses.
- * Returns -ENODEV if a port does not have a lane.
- */
 int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode_port9, cmode_port10, cmode_port;
@@ -466,7 +471,7 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 {
 	int lane;
 
-	lane = mv88e6390_serdes_get_lane(chip, port);
+	lane = mv88e6xxx_serdes_get_lane(chip, port);
 	if (lane == -ENODEV)
 		return 0;
 
@@ -485,7 +490,7 @@ int mv88e6390x_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 {
 	int lane;
 
-	lane = mv88e6390x_serdes_get_lane(chip, port);
+	lane = mv88e6xxx_serdes_get_lane(chip, port);
 	if (lane == -ENODEV)
 		return 0;
 
@@ -638,7 +643,7 @@ static irqreturn_t mv88e6390_serdes_thread_fn(int irq, void *dev_id)
 	int lane;
 	int err;
 
-	lane = mv88e6390x_serdes_get_lane(chip, port->port);
+	lane = mv88e6xxx_serdes_get_lane(chip, port->port);
 
 	mv88e6xxx_reg_lock(chip);
 
@@ -666,7 +671,7 @@ int mv88e6390x_serdes_irq_setup(struct mv88e6xxx_chip *chip, int port)
 	int lane;
 	int err;
 
-	lane = mv88e6390x_serdes_get_lane(chip, port);
+	lane = mv88e6xxx_serdes_get_lane(chip, port);
 
 	if (lane == -ENODEV)
 		return 0;
@@ -711,7 +716,7 @@ int mv88e6390_serdes_irq_setup(struct mv88e6xxx_chip *chip, int port)
 
 void mv88e6390x_serdes_irq_free(struct mv88e6xxx_chip *chip, int port)
 {
-	int lane = mv88e6390x_serdes_get_lane(chip, port);
+	int lane = mv88e6xxx_serdes_get_lane(chip, port);
 
 	if (lane == -ENODEV)
 		return;
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index ff5b94439335..f2ca3bcc3893 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -74,6 +74,8 @@
 #define MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID BIT(11)
 #define MV88E6390_SGMII_PHY_STATUS_LINK		BIT(10)
 
+int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6341_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 06/10] net: dsa: mv88e6xxx: add serdes_get_lane method for Topaz family
From: Marek Behún @ 2019-08-21 23:27 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	Marek Behún
In-Reply-To: <20190821232724.1544-1-marek.behun@nic.cz>

The Topaz family has only one SERDES, on port 5, with address 0x15.
Currently we have MV88E6341_ADDR_SERDES macro used in the
mv88e6341_serdes_power method. Rename the macro to MV88E6341_PORT5_LANE
and use the new mv88e6xxx_serdes_get_lane method in
mv88e6341_serdes_power.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
---
 drivers/net/dsa/mv88e6xxx/chip.c   |  2 ++
 drivers/net/dsa/mv88e6xxx/serdes.c | 25 ++++++++++++++++++++++---
 drivers/net/dsa/mv88e6xxx/serdes.h |  3 ++-
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 2340634aab27..080a5d707714 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2929,6 +2929,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6341_serdes_power,
+	.serdes_get_lane = mv88e6341_serdes_get_lane,
 	.gpio_ops = &mv88e6352_gpio_ops,
 	.phylink_validate = mv88e6341_phylink_validate,
 };
@@ -3623,6 +3624,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6341_serdes_power,
+	.serdes_get_lane = mv88e6341_serdes_get_lane,
 	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6390_avb_ops,
 	.ptp_ops = &mv88e6352_ptp_ops,
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 011322fa24ae..87c967e7f1ae 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -298,6 +298,21 @@ int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return chip->info->ops->serdes_get_lane(chip, port);
 }
 
+int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+{
+	u8 cmode = chip->ports[port].cmode;
+
+	if (port != 5)
+		return -ENODEV;
+
+	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+	    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
+	    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
+		return MV88E6341_PORT5_LANE;
+
+	return -ENODEV;
+}
+
 int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -747,15 +762,19 @@ void mv88e6390_serdes_irq_free(struct mv88e6xxx_chip *chip, int port)
 int mv88e6341_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 {
 	u8 cmode = chip->ports[port].cmode;
+	int lane;
 
-	if (port != 5)
+	lane = mv88e6xxx_serdes_get_lane(chip, port);
+	if (lane == -ENODEV)
 		return 0;
 
+	if (lane < 0)
+		return lane;
+
 	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
 	    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 	    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
-		return mv88e6390_serdes_power_sgmii(chip, MV88E6341_ADDR_SERDES,
-						    on);
+		return mv88e6390_serdes_power_sgmii(chip, lane, on);
 
 	return 0;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index f2ca3bcc3893..de6f1939c541 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -28,7 +28,7 @@
 #define MV88E6352_SERDES_INT_STATUS	0x13
 
 
-#define MV88E6341_ADDR_SERDES		0x15
+#define MV88E6341_PORT5_LANE		0x15
 
 #define MV88E6390_PORT9_LANE0		0x09
 #define MV88E6390_PORT9_LANE1		0x12
@@ -75,6 +75,7 @@
 #define MV88E6390_SGMII_PHY_STATUS_LINK		BIT(10)
 
 int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6341_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 01/10] net: dsa: mv88e6xxx: support 2500base-x in SGMII IRQ handler
From: Marek Behún @ 2019-08-21 23:27 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	Marek Behún
In-Reply-To: <20190821232724.1544-1-marek.behun@nic.cz>

The mv88e6390_serdes_irq_link_sgmii IRQ handler reads the SERDES PHY
status register to determine speed, among other things. If cmode of the
port is set to 2500base-x, though, the PHY still reports 1000 Mbps (the
PHY register itself does not differentiate between 1000 Mbps and 2500
Mbps - it thinks it is running at 1000 Mbps, although clock is 2.5x
faster).
Look at the cmode and set SPEED_2500 if cmode is set to 2500base-x.
Also tell mv88e6xxx_port_setup_mac the PHY interface mode corresponding
to current cmode in terms of phy_interface_t.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Vladimir Oltean <olteanv@gmail.com>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/serdes.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 20c526c2a9ee..0f3d7cbb696b 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -506,9 +506,11 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 					    int port, int lane)
 {
 	struct dsa_switch *ds = chip->ds;
+	u8 cmode = chip->ports[port].cmode;
 	int duplex = DUPLEX_UNKNOWN;
 	int speed = SPEED_UNKNOWN;
 	int link, err;
+	phy_interface_t mode;
 	u16 status;
 
 	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
@@ -527,7 +529,10 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 
 		switch (status & MV88E6390_SGMII_PHY_STATUS_SPEED_MASK) {
 		case MV88E6390_SGMII_PHY_STATUS_SPEED_1000:
-			speed = SPEED_1000;
+			if (cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
+				speed = SPEED_2500;
+			else
+				speed = SPEED_1000;
 			break;
 		case MV88E6390_SGMII_PHY_STATUS_SPEED_100:
 			speed = SPEED_100;
@@ -541,8 +546,22 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 		}
 	}
 
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+		mode = PHY_INTERFACE_MODE_SGMII;
+		break;
+	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+		mode = PHY_INTERFACE_MODE_1000BASEX;
+		break;
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		mode = PHY_INTERFACE_MODE_2500BASEX;
+		break;
+	default:
+		mode = PHY_INTERFACE_MODE_NA;
+	}
+
 	err = mv88e6xxx_port_setup_mac(chip, port, link, speed, duplex,
-				       PAUSE_OFF, PHY_INTERFACE_MODE_NA);
+				       PAUSE_OFF, mode);
 	if (err)
 		dev_err(chip->dev, "can't propagate PHY settings to MAC: %d\n",
 			err);
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 07/10] net: dsa: mv88e6xxx: rename port cmode macro
From: Marek Behún @ 2019-08-21 23:27 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	Marek Behún
In-Reply-To: <20190821232724.1544-1-marek.behun@nic.cz>

This is a cosmetic update. We are removing the last underscore from
macros MV88E6XXX_PORT_STS_CMODE_100BASE_X and
MV88E6XXX_PORT_STS_CMODE_1000BASE_X. The 2500base-x version does not
have that underscore. Also PHY_INTERFACE_MODE_ macros do not have it
there.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
---
 drivers/net/dsa/mv88e6xxx/port.c   |  4 +--
 drivers/net/dsa/mv88e6xxx/port.h   |  4 +--
 drivers/net/dsa/mv88e6xxx/serdes.c | 50 +++++++++++++++---------------
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 092176fd3d90..b1f66ea833ed 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -411,7 +411,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 
 	switch (mode) {
 	case PHY_INTERFACE_MODE_1000BASEX:
-		cmode = MV88E6XXX_PORT_STS_CMODE_1000BASE_X;
+		cmode = MV88E6XXX_PORT_STS_CMODE_1000BASEX;
 		break;
 	case PHY_INTERFACE_MODE_SGMII:
 		cmode = MV88E6XXX_PORT_STS_CMODE_SGMII;
@@ -618,7 +618,7 @@ int mv88e6352_port_link_state(struct mv88e6xxx_chip *chip, int port,
 		else
 			state->interface = PHY_INTERFACE_MODE_RGMII;
 		break;
-	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
 		state->interface = PHY_INTERFACE_MODE_1000BASEX;
 		break;
 	case MV88E6XXX_PORT_STS_CMODE_SGMII:
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 5c5e8e7397eb..cd48670f46ae 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -43,8 +43,8 @@
 #define MV88E6XXX_PORT_STS_FLOW_CTL		0x0010
 #define MV88E6XXX_PORT_STS_CMODE_MASK		0x000f
 #define MV88E6XXX_PORT_STS_CMODE_RGMII		0x0007
-#define MV88E6XXX_PORT_STS_CMODE_100BASE_X	0x0008
-#define MV88E6XXX_PORT_STS_CMODE_1000BASE_X	0x0009
+#define MV88E6XXX_PORT_STS_CMODE_100BASEX	0x0008
+#define MV88E6XXX_PORT_STS_CMODE_1000BASEX	0x0009
 #define MV88E6XXX_PORT_STS_CMODE_SGMII		0x000a
 #define MV88E6XXX_PORT_STS_CMODE_2500BASEX	0x000b
 #define MV88E6XXX_PORT_STS_CMODE_XAUI		0x000c
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 87c967e7f1ae..fd27b60875e0 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -73,8 +73,8 @@ static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
 
-	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
-	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
+	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASEX) ||
+	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
 		return true;
 
@@ -305,7 +305,7 @@ int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	if (port != 5)
 		return -ENODEV;
 
-	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 	    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 	    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
 		return MV88E6341_PORT5_LANE;
@@ -319,13 +319,13 @@ int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 
 	switch (port) {
 	case 9:
-		if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
 			return MV88E6390_PORT9_LANE0;
 		return -ENODEV;
 	case 10:
-		if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
 			return MV88E6390_PORT10_LANE0;
@@ -345,53 +345,53 @@ int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 
 	switch (port) {
 	case 2:
-		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
-			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASE_X)
+			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASEX)
 				return MV88E6390_PORT9_LANE1;
 		return -ENODEV;
 	case 3:
-		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_RXAUI)
-			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASE_X)
+			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASEX)
 				return MV88E6390_PORT9_LANE2;
 		return -ENODEV;
 	case 4:
-		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_RXAUI)
-			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASE_X)
+			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASEX)
 				return MV88E6390_PORT9_LANE3;
 		return -ENODEV;
 	case 5:
-		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
-			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASE_X)
+			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASEX)
 				return MV88E6390_PORT10_LANE1;
 		return -ENODEV;
 	case 6:
-		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_RXAUI)
-			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASE_X)
+			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASEX)
 				return MV88E6390_PORT10_LANE2;
 		return -ENODEV;
 	case 7:
-		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_RXAUI)
-			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASE_X)
+			if (cmode_port == MV88E6XXX_PORT_STS_CMODE_1000BASEX)
 				return MV88E6390_PORT10_LANE3;
 		return -ENODEV;
 	case 9:
-		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port9 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
 		    cmode_port9 == MV88E6XXX_PORT_STS_CMODE_XAUI ||
@@ -399,7 +399,7 @@ int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 			return MV88E6390_PORT9_LANE0;
 		return -ENODEV;
 	case 10:
-		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+		if (cmode_port10 == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
 		    cmode_port10 == MV88E6XXX_PORT_STS_CMODE_XAUI ||
@@ -471,7 +471,7 @@ static int mv88e6390_serdes_power_lane(struct mv88e6xxx_chip *chip, int port,
 
 	switch (cmode) {
 	case MV88E6XXX_PORT_STS_CMODE_SGMII:
-	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
 	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
 		return mv88e6390_serdes_power_sgmii(chip, lane, on);
 	case MV88E6XXX_PORT_STS_CMODE_XAUI:
@@ -570,7 +570,7 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 	case MV88E6XXX_PORT_STS_CMODE_SGMII:
 		mode = PHY_INTERFACE_MODE_SGMII;
 		break;
-	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
 		mode = PHY_INTERFACE_MODE_1000BASEX;
 		break;
 	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
@@ -613,7 +613,7 @@ int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
 
 	switch (cmode) {
 	case MV88E6XXX_PORT_STS_CMODE_SGMII:
-	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
 	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
 		err = mv88e6390_serdes_irq_enable_sgmii(chip, lane);
 	}
@@ -629,7 +629,7 @@ int mv88e6390_serdes_irq_disable(struct mv88e6xxx_chip *chip, int port,
 
 	switch (cmode) {
 	case MV88E6XXX_PORT_STS_CMODE_SGMII:
-	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
 	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
 		err = mv88e6390_serdes_irq_disable_sgmii(chip, lane);
 	}
@@ -664,7 +664,7 @@ static irqreturn_t mv88e6390_serdes_thread_fn(int irq, void *dev_id)
 
 	switch (cmode) {
 	case MV88E6XXX_PORT_STS_CMODE_SGMII:
-	case MV88E6XXX_PORT_STS_CMODE_1000BASE_X:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
 	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
 		err = mv88e6390_serdes_irq_status_sgmii(chip, lane, &status);
 		if (err)
@@ -771,7 +771,7 @@ int mv88e6341_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 	if (lane < 0)
 		return lane;
 
-	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
 	    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
 	    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
 		return mv88e6390_serdes_power_sgmii(chip, lane, on);
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox