netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input.
@ 2023-01-19 19:33 Antony Antony
  2023-01-19 19:33 ` [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
                   ` (6 more replies)
  0 siblings, 7 replies; 28+ messages in thread
From: Antony Antony @ 2023-01-19 19:33 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: netdev

From: Steffen Klassert <steffen.klassert@secunet.com>

This is needed to support GRO for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/esp4_offload.c |  2 +-
 net/ipv6/esp6_offload.c |  2 +-
 net/xfrm/xfrm_input.c   | 75 +++++++++++++++++++++++------------------
 3 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..77bb01032667 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -76,7 +76,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..ee5f5abdb503 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -103,7 +103,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c06e54a10540..ffd62ad58207 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -458,6 +458,35 @@ static int xfrm_inner_mode_input(struct xfrm_state *x,
 	return -EOPNOTSUPP;
 }
 
+static int xfrm_input_check_offload(struct net *net, struct sk_buff *skb,
+				    struct xfrm_state *x,
+				    struct xfrm_offload *xo)
+{
+	if (!(xo->status & CRYPTO_SUCCESS)) {
+		if (xo->status &
+		    (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
+		     CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
+		     CRYPTO_TUNNEL_AH_AUTH_FAILED |
+		     CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {
+			xfrm_audit_state_icvfail(x, skb,
+						 x->type->proto);
+			x->stats.integrity_failed++;
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+			return -EINVAL;
+		}
+
+		if (xo->status & CRYPTO_INVALID_PROTOCOL) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+			return -EINVAL;
+		}
+
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 {
 	const struct xfrm_state_afinfo *afinfo;
@@ -477,7 +506,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct sec_path *sp;
 
-	if (encap_type < 0) {
+	if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
 		x = xfrm_input_state(skb);
 
 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -495,46 +524,26 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		family = x->outer_mode.family;
 
 		/* An encap_type of -1 indicates async resumption. */
-		if (encap_type == -1) {
+		if (encap_type  < 0) {
 			async = 1;
 			seq = XFRM_SKB_CB(skb)->seq.input.low;
 			goto resume;
-		}
+		} else {
+			/* GRO call */
+			seq = XFRM_SPI_SKB_CB(skb)->seq;
 
-		/* encap_type < -1 indicates a GRO call. */
-		encap_type = 0;
-		seq = XFRM_SPI_SKB_CB(skb)->seq;
-
-		if (xo && (xo->flags & CRYPTO_DONE)) {
-			crypto_done = true;
-			family = XFRM_SPI_SKB_CB(skb)->family;
-
-			if (!(xo->status & CRYPTO_SUCCESS)) {
-				if (xo->status &
-				    (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
-				     CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
-				     CRYPTO_TUNNEL_AH_AUTH_FAILED |
-				     CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {
-
-					xfrm_audit_state_icvfail(x, skb,
-								 x->type->proto);
-					x->stats.integrity_failed++;
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+			if (xo && (xo->flags & CRYPTO_DONE)) {
+				crypto_done = true;
+				family = XFRM_SPI_SKB_CB(skb)->family;
+
+				err = xfrm_input_check_offload(net, skb, x, xo);
+				if (err)
 					goto drop;
-				}
 
-				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+				if (xfrm_parse_spi(skb, nexthdr, &spi, &seq)) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 					goto drop;
 				}
-
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
-				goto drop;
-			}
-
-			if (xfrm_parse_spi(skb, nexthdr, &spi, &seq)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
-				goto drop;
 			}
 		}
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
@ 2023-01-19 19:33 ` Antony Antony
  2023-01-20 11:04   ` Eyal Birger
  2023-01-19 19:34 ` [PATCH 3/3] xfrm: Support GRO for IPv6 " Antony Antony
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 28+ messages in thread
From: Antony Antony @ 2023-01-19 19:33 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: netdev

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/gro.h       |  2 +-
 include/net/xfrm.h      |  4 ++
 net/ipv4/esp4_offload.c | 11 ++++-
 net/ipv4/udp.c          |  4 +-
 net/ipv4/xfrm4_input.c  | 99 +++++++++++++++++++++++++++++++++--------
 5 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/include/net/gro.h b/include/net/gro.h
index a4fab706240d..41c12c5d1ea1 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -29,7 +29,7 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;
 
-	/* Used in ipv6_gro_receive() and foo-over-udp */
+	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
 	u16	proto;
 
 	/* jiffies when first packet was created/queued */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3e1f70e8e424..74dba98fbf2c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1666,6 +1666,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
 int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm4_transport_finish(struct sk_buff *skb, int async);
 int xfrm4_rcv(struct sk_buff *skb);
 
@@ -1706,6 +1708,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 77bb01032667..8769bb669fdd 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 
@@ -69,6 +70,14 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	xo->flags |= XFRM_GRO;
 
+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP && skb->sk &&
+	    (udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP ||
+	     udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP_NON_IKE)) {
+		encap_type = udp_sk(skb->sk)->encap_type;
+		sock_put(skb->sk);
+		skb->sk = NULL;
+	}
+
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -76,7 +85,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9592fe3e444a..6a30d0210c4e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2729,9 +2729,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
 				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
-			else
 #endif
+			if (sk->sk_family == AF_INET) {
 				up->encap_rcv = xfrm4_udp_encap_rcv;
+				up->gro_receive = xfrm4_gro_udp_encap_rcv;
+			}
 #endif
 			fallthrough;
 		case UDP_ENCAP_L2TPINUDP:
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..768d12491a48 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
 
 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }
 
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {
 
@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;
 
 	/* Now we can update and verify the packet length... */
 	iph = ip_hdr(skb);
@@ -147,24 +142,92 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}
 
 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}
 
 	/* process ESP */
-	return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	if (!refcount_inc_not_zero(&sk->sk_refcnt))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb->sk = sk;
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	sock_put(sk);
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/3] xfrm: Support GRO for IPv6 ESP in UDP encapsulation.
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
  2023-01-19 19:33 ` [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-01-19 19:34 ` Antony Antony
  2023-02-06 17:18   ` Pablo Neira Ayuso
  2023-01-20 11:05 ` [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Eyal Birger
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 28+ messages in thread
From: Antony Antony @ 2023-01-19 19:34 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: netdev

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  4 +-
 net/ipv4/udp.c           |  4 +-
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 15 +++++-
 net/ipv6/xfrm6_input.c   | 99 ++++++++++++++++++++++++++++++++--------
 6 files changed, 103 insertions(+), 23 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..887d35f716c7 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
 #if IS_ENABLED(CONFIG_XFRM)
 	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+						   struct list_head *head,
+						   struct sk_buff *skb);
 	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
 			       int encap_type);
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 74dba98fbf2c..5cc6d8432d2f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1666,8 +1666,6 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
 int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
-struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
-					struct sk_buff *skb);
 int xfrm4_transport_finish(struct sk_buff *skb, int async);
 int xfrm4_rcv(struct sk_buff *skb);
 
@@ -1710,6 +1708,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
 					struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6a30d0210c4e..497ef68c80ea 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2727,8 +2727,10 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case UDP_ENCAP_ESPINUDP:
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
-			if (sk->sk_family == AF_INET6)
+			if (sk->sk_family == AF_INET6) {
 				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
+				up->gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv;
+			}
 #endif
 			if (sk->sk_family == AF_INET) {
 				up->encap_rcv = xfrm4_udp_encap_rcv;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fee9163382c2..03c04a5a073d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1054,6 +1054,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 #if IS_ENABLED(CONFIG_XFRM)
 	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+	.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
 	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ee5f5abdb503..395bfee94d84 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -33,7 +33,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	int off = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr;
 
-	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+	/* ESP or ESPINUDP */
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+		   ipv6_hdr->nexthdr == NEXTHDR_UDP))
 		return offsetof(struct ipv6hdr, nexthdr);
 
 	while (off < nhlen) {
@@ -53,10 +55,19 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 	int nhoff;
 
+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP && skb->sk &&
+	    (udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP ||
+	     udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP_NON_IKE)) {
+		encap_type = udp_sk(skb->sk)->encap_type;
+		sock_put(skb->sk);
+		skb->sk = NULL;
+	}
+
 	if (!pskb_pull(skb, offset))
 		return NULL;
 
@@ -103,7 +114,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 04cbeefd8982..cd18ca75c9f6 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t)
@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }
 
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -106,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -117,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {
 
@@ -135,31 +130,99 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;
 
 	/* Now we can update and verify the packet length... */
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
 	if (skb->len < ip6hlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}
 
 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}
 
 	/* process ESP */
-	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	if (!refcount_inc_not_zero(&sk->sk_refcnt))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb->sk = sk;
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	sock_put(sk);
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.
  2023-01-19 19:33 ` [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-01-20 11:04   ` Eyal Birger
  0 siblings, 0 replies; 28+ messages in thread
From: Eyal Birger @ 2023-01-20 11:04 UTC (permalink / raw)
  To: antony.antony; +Cc: Steffen Klassert, Herbert Xu, netdev

Hi,

On Thu, Jan 19, 2023 at 10:00 PM Antony Antony
<antony.antony@secunet.com> wrote:
>
> From: Steffen Klassert <steffen.klassert@secunet.com>
>
> This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> packets. Decapsulation happens at L2 and saves a full round through
> the stack for each packet. This is also needed to support HW offload
> for ESP in UDP encapsulation.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> Co-developed-by: Antony Antony <antony.antony@secunet.com>
> Signed-off-by: Antony Antony <antony.antony@secunet.com>
> ---
>  include/net/gro.h       |  2 +-
>  include/net/xfrm.h      |  4 ++
>  net/ipv4/esp4_offload.c | 11 ++++-
>  net/ipv4/udp.c          |  4 +-
>  net/ipv4/xfrm4_input.c  | 99 +++++++++++++++++++++++++++++++++--------
>  5 files changed, 99 insertions(+), 21 deletions(-)
>
> diff --git a/include/net/gro.h b/include/net/gro.h
> index a4fab706240d..41c12c5d1ea1 100644
> --- a/include/net/gro.h
> +++ b/include/net/gro.h
> @@ -29,7 +29,7 @@ struct napi_gro_cb {
>         /* Number of segments aggregated. */
>         u16     count;
>
> -       /* Used in ipv6_gro_receive() and foo-over-udp */
> +       /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
>         u16     proto;
>
>         /* jiffies when first packet was created/queued */
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 3e1f70e8e424..74dba98fbf2c 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1666,6 +1666,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
>  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
>  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
>                     int encap_type);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);
>  int xfrm4_transport_finish(struct sk_buff *skb, int async);
>  int xfrm4_rcv(struct sk_buff *skb);
>
> @@ -1706,6 +1708,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
>  void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
>  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
>  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);
>  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
>                      int optlen);
>  #else
> diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> index 77bb01032667..8769bb669fdd 100644
> --- a/net/ipv4/esp4_offload.c
> +++ b/net/ipv4/esp4_offload.c
> @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>         int offset = skb_gro_offset(skb);
>         struct xfrm_offload *xo;
>         struct xfrm_state *x;
> +       int encap_type = 0;
>         __be32 seq;
>         __be32 spi;
>
> @@ -69,6 +70,14 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         xo->flags |= XFRM_GRO;
>
> +       if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP && skb->sk &&
> +           (udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP ||
> +            udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP_NON_IKE)) {
> +               encap_type = udp_sk(skb->sk)->encap_type;
> +               sock_put(skb->sk);
> +               skb->sk = NULL;
> +       }
> +
>         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
>         XFRM_SPI_SKB_CB(skb)->family = AF_INET;
>         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> @@ -76,7 +85,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         /* We don't need to handle errors from xfrm_input, it does all
>          * the error handling and frees the resources on error. */
> -       xfrm_input(skb, IPPROTO_ESP, spi, 0);
> +       xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
>
>         return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 9592fe3e444a..6a30d0210c4e 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2729,9 +2729,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>  #if IS_ENABLED(CONFIG_IPV6)
>                         if (sk->sk_family == AF_INET6)
>                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> -                       else
>  #endif
> +                       if (sk->sk_family == AF_INET) {
>                                 up->encap_rcv = xfrm4_udp_encap_rcv;
> +                               up->gro_receive = xfrm4_gro_udp_encap_rcv;
> +                       }
>  #endif
>                         fallthrough;
>                 case UDP_ENCAP_L2TPINUDP:
> diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> index ad2afeef4f10..768d12491a48 100644
> --- a/net/ipv4/xfrm4_input.c
> +++ b/net/ipv4/xfrm4_input.c
> @@ -17,6 +17,8 @@
>  #include <linux/netfilter_ipv4.h>
>  #include <net/ip.h>
>  #include <net/xfrm.h>
> +#include <net/protocol.h>
> +#include <net/gro.h>
>
>  static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
>                                    struct sk_buff *skb)
> @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
>         return 0;
>  }
>
> -/* If it's a keepalive packet, then just eat it.
> - * If it's an encapsulated packet, then pass it to the
> - * IPsec xfrm input.
> - * Returns 0 if skb passed to xfrm or was dropped.
> - * Returns >0 if skb should be passed to UDP.
> - * Returns <0 if skb should be resubmitted (-ret is protocol)
> - */
> -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
>  {
>         struct udp_sock *up = udp_sk(sk);
>         struct udphdr *uh;
> @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
>                         /* ESP Packet without Non-ESP header */
>                         len = sizeof(struct udphdr);
> @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP_NON_IKE:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
>                            udpdata32[0] == 0 && udpdata32[1] == 0) {
>
> @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>          * protocol to ESP, and then call into the transform receiver.
>          */
>         if (skb_unclone(skb, GFP_ATOMIC))
> -               goto drop;
> +               return -EINVAL;
>
>         /* Now we can update and verify the packet length... */
>         iph = ip_hdr(skb);
> @@ -147,24 +142,92 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         iph->tot_len = htons(ntohs(iph->tot_len) - len);
>         if (skb->len < iphlen + len) {
>                 /* packet is too small!?! */
> -               goto drop;
> +               return -EINVAL;
>         }
>
>         /* pull the data buffer up to the ESP header and set the
>          * transport header to point to ESP.  Keep UDP on the stack
>          * for later.
>          */
> -       __skb_pull(skb, len);
> -       skb_reset_transport_header(skb);
> +       if (pull) {
> +               __skb_pull(skb, len);
> +               skb_reset_transport_header(skb);
> +       } else {
> +               skb_set_transport_header(skb, len);
> +       }
>
>         /* process ESP */
> -       return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> -
> -drop:
> -       kfree_skb(skb);
>         return 0;
>  }
>
> +/* If it's a keepalive packet, then just eat it.
> + * If it's an encapsulated packet, then pass it to the
> + * IPsec xfrm input.
> + * Returns 0 if skb passed to xfrm or was dropped.
> + * Returns >0 if skb should be passed to UDP.
> + * Returns <0 if skb should be resubmitted (-ret is protocol)
> + */
> +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +       int ret;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> +       if (!ret)
> +               return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> +                                      udp_sk(sk)->encap_type);
> +
> +       if (ret < 0) {
> +               kfree_skb(skb);
> +               return 0;
> +       }
> +
> +       return ret;
> +}
> +
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb)
> +{
> +       int offset = skb_gro_offset(skb);
> +       const struct net_offload *ops;
> +       struct sk_buff *pp = NULL;
> +       int ret;
> +
> +       offset = offset - sizeof(struct udphdr);
> +
> +       if (!pskb_pull(skb, offset))
> +               return NULL;
> +
> +       if (!refcount_inc_not_zero(&sk->sk_refcnt))
> +               return NULL;
> +

Isn't a push needed in case of failure above?

> +       rcu_read_lock();
> +       ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> +       if (!ops || !ops->callbacks.gro_receive)
> +               goto out;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> +       if (ret)
> +               goto out;
> +
> +       skb->sk = sk;

Don't you need something like skb_set_owner_sk_safe() so the
destructor is also set?

> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> +
> +       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> +       rcu_read_unlock();
> +
> +       return pp;
> +
> +out:
> +       rcu_read_unlock();
> +       sock_put(sk);
> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->same_flow = 0;
> +       NAPI_GRO_CB(skb)->flush = 1;
> +
> +       return NULL;
> +}
> +
>  int xfrm4_rcv(struct sk_buff *skb)
>  {
>         return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> --
> 2.30.2
>

Eyal.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input.
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
  2023-01-19 19:33 ` [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
  2023-01-19 19:34 ` [PATCH 3/3] xfrm: Support GRO for IPv6 " Antony Antony
@ 2023-01-20 11:05 ` Eyal Birger
  2023-08-16  9:57 ` [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation Antony Antony
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Eyal Birger @ 2023-01-20 11:05 UTC (permalink / raw)
  To: antony.antony; +Cc: Steffen Klassert, Herbert Xu, netdev

Hi,

On Thu, Jan 19, 2023 at 9:59 PM Antony Antony <antony.antony@secunet.com> wrote:
>
> From: Steffen Klassert <steffen.klassert@secunet.com>
>
> This is needed to support GRO for ESP in UDP encapsulation.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> Signed-off-by: Antony Antony <antony.antony@secunet.com>
> ---
>  net/ipv4/esp4_offload.c |  2 +-
>  net/ipv6/esp6_offload.c |  2 +-
>  net/xfrm/xfrm_input.c   | 75 +++++++++++++++++++++++------------------
>  3 files changed, 44 insertions(+), 35 deletions(-)
>
> diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> index 3969fa805679..77bb01032667 100644
> --- a/net/ipv4/esp4_offload.c
> +++ b/net/ipv4/esp4_offload.c
> @@ -76,7 +76,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         /* We don't need to handle errors from xfrm_input, it does all
>          * the error handling and frees the resources on error. */
> -       xfrm_input(skb, IPPROTO_ESP, spi, -2);
> +       xfrm_input(skb, IPPROTO_ESP, spi, 0);
>
>         return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
> index 75c02992c520..ee5f5abdb503 100644
> --- a/net/ipv6/esp6_offload.c
> +++ b/net/ipv6/esp6_offload.c
> @@ -103,7 +103,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
>
>         /* We don't need to handle errors from xfrm_input, it does all
>          * the error handling and frees the resources on error. */
> -       xfrm_input(skb, IPPROTO_ESP, spi, -2);
> +       xfrm_input(skb, IPPROTO_ESP, spi, 0);
>
>         return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
> index c06e54a10540..ffd62ad58207 100644
> --- a/net/xfrm/xfrm_input.c
> +++ b/net/xfrm/xfrm_input.c
> @@ -458,6 +458,35 @@ static int xfrm_inner_mode_input(struct xfrm_state *x,
>         return -EOPNOTSUPP;
>  }
>
> +static int xfrm_input_check_offload(struct net *net, struct sk_buff *skb,
> +                                   struct xfrm_state *x,
> +                                   struct xfrm_offload *xo)
> +{
> +       if (!(xo->status & CRYPTO_SUCCESS)) {
> +               if (xo->status &
> +                   (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
> +                    CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
> +                    CRYPTO_TUNNEL_AH_AUTH_FAILED |
> +                    CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {
> +                       xfrm_audit_state_icvfail(x, skb,
> +                                                x->type->proto);
> +                       x->stats.integrity_failed++;
> +                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
> +                       return -EINVAL;
> +               }
> +
> +               if (xo->status & CRYPTO_INVALID_PROTOCOL) {
> +                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
> +                       return -EINVAL;
> +               }
> +
> +               XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
> +               return -EINVAL;
> +       }
> +
> +       return 0;
> +}
> +
>  int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>  {
>         const struct xfrm_state_afinfo *afinfo;
> @@ -477,7 +506,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>         struct xfrm_offload *xo = xfrm_offload(skb);
>         struct sec_path *sp;
>
> -       if (encap_type < 0) {
> +       if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
>                 x = xfrm_input_state(skb);
>
>                 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
> @@ -495,46 +524,26 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>                 family = x->outer_mode.family;
>
>                 /* An encap_type of -1 indicates async resumption. */
> -               if (encap_type == -1) {
> +               if (encap_type  < 0) {

Why is this specific line change needed? I see that now -2 is not sent
anymore, so how is this related?
If it is needed, maybe the comment above also needs updating?

nit, a cover letter would've been handy so that the series could be
fetched as a whole.

Eyal.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/3] xfrm: Support GRO for IPv6 ESP in UDP encapsulation.
  2023-01-19 19:34 ` [PATCH 3/3] xfrm: Support GRO for IPv6 " Antony Antony
@ 2023-02-06 17:18   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 28+ messages in thread
From: Pablo Neira Ayuso @ 2023-02-06 17:18 UTC (permalink / raw)
  To: Antony Antony; +Cc: Steffen Klassert, Herbert Xu, netdev

On Thu, Jan 19, 2023 at 08:34:00PM +0100, Antony Antony wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> 
> This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated
> packets. Decapsulation happens at L2 and saves a full round through
> the stack for each packet. This is also needed to support HW offload
> for ESP in UDP encapsulation.
> 
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> Co-developed-by: Antony Antony <antony.antony@secunet.com>
> Signed-off-by: Antony Antony <antony.antony@secunet.com>
> ---
>  include/net/ipv6_stubs.h |  3 ++
>  include/net/xfrm.h       |  4 +-
>  net/ipv4/udp.c           |  4 +-
>  net/ipv6/af_inet6.c      |  1 +
>  net/ipv6/esp6_offload.c  | 15 +++++-
>  net/ipv6/xfrm6_input.c   | 99 ++++++++++++++++++++++++++++++++--------
>  6 files changed, 103 insertions(+), 23 deletions(-)
> 
> diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
> index c48186bf4737..887d35f716c7 100644
> --- a/include/net/ipv6_stubs.h
> +++ b/include/net/ipv6_stubs.h
> @@ -60,6 +60,9 @@ struct ipv6_stub {
>  #if IS_ENABLED(CONFIG_XFRM)
>  	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
>  	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
> +	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
> +						   struct list_head *head,
> +						   struct sk_buff *skb);
>  	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
>  			       int encap_type);
>  #endif
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 74dba98fbf2c..5cc6d8432d2f 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1666,8 +1666,6 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
>  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
>  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
>  		    int encap_type);
> -struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> -					struct sk_buff *skb);
>  int xfrm4_transport_finish(struct sk_buff *skb, int async);
>  int xfrm4_rcv(struct sk_buff *skb);
>  
> @@ -1710,6 +1708,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
>  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
>  struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
>  					struct sk_buff *skb);
> +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +					struct sk_buff *skb);
>  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
>  		     int optlen);
>  #else
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 6a30d0210c4e..497ef68c80ea 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2727,8 +2727,10 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>  		case UDP_ENCAP_ESPINUDP:
>  		case UDP_ENCAP_ESPINUDP_NON_IKE:
>  #if IS_ENABLED(CONFIG_IPV6)
> -			if (sk->sk_family == AF_INET6)
> +			if (sk->sk_family == AF_INET6) {
>  				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> +				up->gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv;
> +			}
>  #endif
>  			if (sk->sk_family == AF_INET) {
>  				up->encap_rcv = xfrm4_udp_encap_rcv;
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index fee9163382c2..03c04a5a073d 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -1054,6 +1054,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
>  #if IS_ENABLED(CONFIG_XFRM)
>  	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
>  	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
> +	.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
>  	.xfrm6_rcv_encap = xfrm6_rcv_encap,
>  #endif
>  	.nd_tbl	= &nd_tbl,
> diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
> index ee5f5abdb503..395bfee94d84 100644
> --- a/net/ipv6/esp6_offload.c
> +++ b/net/ipv6/esp6_offload.c
> @@ -33,7 +33,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
>  	int off = sizeof(struct ipv6hdr);
>  	struct ipv6_opt_hdr *exthdr;
>  
> -	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
> +	/* ESP or ESPINUDP */
> +	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
> +		   ipv6_hdr->nexthdr == NEXTHDR_UDP))
>  		return offsetof(struct ipv6hdr, nexthdr);
>  
>  	while (off < nhlen) {
> @@ -53,10 +55,19 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
>  	int offset = skb_gro_offset(skb);
>  	struct xfrm_offload *xo;
>  	struct xfrm_state *x;
> +	int encap_type = 0;
>  	__be32 seq;
>  	__be32 spi;
>  	int nhoff;
>  
> +	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP && skb->sk &&
> +	    (udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP ||
> +	     udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP_NON_IKE)) {
> +		encap_type = udp_sk(skb->sk)->encap_type;
> +		sock_put(skb->sk);
> +		skb->sk = NULL;
> +	}
> +
>  	if (!pskb_pull(skb, offset))
>  		return NULL;
>  
> @@ -103,7 +114,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
>  
>  	/* We don't need to handle errors from xfrm_input, it does all
>  	 * the error handling and frees the resources on error. */
> -	xfrm_input(skb, IPPROTO_ESP, spi, 0);
> +	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
>  
>  	return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
> index 04cbeefd8982..cd18ca75c9f6 100644
> --- a/net/ipv6/xfrm6_input.c
> +++ b/net/ipv6/xfrm6_input.c
> @@ -16,6 +16,8 @@
>  #include <linux/netfilter_ipv6.h>
>  #include <net/ipv6.h>
>  #include <net/xfrm.h>
> +#include <net/protocol.h>
> +#include <net/gro.h>
>  
>  int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
>  		  struct ip6_tnl *t)
> @@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
>  	return 0;
>  }
>  
> -/* If it's a keepalive packet, then just eat it.
> - * If it's an encapsulated packet, then pass it to the
> - * IPsec xfrm input.
> - * Returns 0 if skb passed to xfrm or was dropped.
> - * Returns >0 if skb should be passed to UDP.
> - * Returns <0 if skb should be resubmitted (-ret is protocol)
> - */
> -int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
>  {
>  	struct udp_sock *up = udp_sk(sk);
>  	struct udphdr *uh;
> @@ -106,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>  	case UDP_ENCAP_ESPINUDP:
>  		/* Check if this is a keepalive packet.  If so, eat it. */
>  		if (len == 1 && udpdata[0] == 0xff) {
> -			goto drop;
> +			return -EINVAL;
>  		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
>  			/* ESP Packet without Non-ESP header */
>  			len = sizeof(struct udphdr);
> @@ -117,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>  	case UDP_ENCAP_ESPINUDP_NON_IKE:
>  		/* Check if this is a keepalive packet.  If so, eat it. */
>  		if (len == 1 && udpdata[0] == 0xff) {
> -			goto drop;
> +			return -EINVAL;
>  		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
>  			   udpdata32[0] == 0 && udpdata32[1] == 0) {
>  
> @@ -135,31 +130,99 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>  	 * protocol to ESP, and then call into the transform receiver.
>  	 */
>  	if (skb_unclone(skb, GFP_ATOMIC))
> -		goto drop;
> +		return -EINVAL;
>  
>  	/* Now we can update and verify the packet length... */
>  	ip6h = ipv6_hdr(skb);
>  	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
>  	if (skb->len < ip6hlen + len) {
>  		/* packet is too small!?! */
> -		goto drop;
> +		return -EINVAL;
>  	}
>  
>  	/* pull the data buffer up to the ESP header and set the
>  	 * transport header to point to ESP.  Keep UDP on the stack
>  	 * for later.
>  	 */
> -	__skb_pull(skb, len);
> -	skb_reset_transport_header(skb);
> +	if (pull) {
> +		__skb_pull(skb, len);
> +		skb_reset_transport_header(skb);
> +	} else {
> +		skb_set_transport_header(skb, len);
> +	}
>  
>  	/* process ESP */
> -	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> -
> -drop:
> -	kfree_skb(skb);
>  	return 0;
>  }
>  
> +/* If it's a keepalive packet, then just eat it.
> + * If it's an encapsulated packet, then pass it to the
> + * IPsec xfrm input.
> + * Returns 0 if skb passed to xfrm or was dropped.
> + * Returns >0 if skb should be passed to UDP.
> + * Returns <0 if skb should be resubmitted (-ret is protocol)
> + */
> +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +	int ret;
> +
> +	ret = __xfrm6_udp_encap_rcv(sk, skb, true);
> +	if (!ret)
> +		return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
> +				       udp_sk(sk)->encap_type);
> +
> +	if (ret < 0) {
> +		kfree_skb(skb);
> +		return 0;
> +	}
> +
> +	return ret;
> +}
> +
> +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +					struct sk_buff *skb)
> +{
> +	int offset = skb_gro_offset(skb);
> +	const struct net_offload *ops;
> +	struct sk_buff *pp = NULL;
> +	int ret;
> +
> +	offset = offset - sizeof(struct udphdr);
> +
> +	if (!pskb_pull(skb, offset))
> +		return NULL;
> +
> +	if (!refcount_inc_not_zero(&sk->sk_refcnt))
> +		return NULL;
> +
> +	rcu_read_lock();
> +	ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
> +	if (!ops || !ops->callbacks.gro_receive)
> +		goto out;
> +
> +	ret = __xfrm6_udp_encap_rcv(sk, skb, false);
> +	if (ret)
> +		goto out;
> +
> +	skb->sk = sk;
> +	skb_push(skb, offset);
> +	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> +
> +	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> +	rcu_read_unlock();
> +
> +	return pp;
> +
> +out:
> +	rcu_read_unlock();
> +	sock_put(sk);
> +	skb_push(skb, offset);
> +	NAPI_GRO_CB(skb)->same_flow = 0;
> +	NAPI_GRO_CB(skb)->flush = 1;
> +
> +	return NULL;

This function looks like a copy and paste, maybe:

struct sk_buff *xfrm_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
                                       struct sk_buff *skb, const struct net_offload *ops,
                                       int (*encap_rcv)(struct sock *sk, struct sk_buff *skb, bool x))
{
        ...
}

Then, pass __xfrm4_udp_encap_rcv() and __xfrm6_udp_encap_rcv() and
net_offload, so IPv4 and IPv6 use the same function.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                   ` (2 preceding siblings ...)
  2023-01-20 11:05 ` [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Eyal Birger
@ 2023-08-16  9:57 ` Antony Antony
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                     ` (2 more replies)
  2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
                   ` (2 subsequent siblings)
  6 siblings, 3 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16  9:57 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, Antony Antony via Devel, netdev

Hello,
Here I re-worked this patch set and here is v4.

v1->v2 fixed error path added skb_push
        use is_fou instead of holding sk in skb.
        user configurable option to enable GRO; using UDP_GRO

v2->v3 only support GRO for UDP_ENCAP_ESPINUDP and not
        UDP_ENCAP_ESPINUDP_NON_IKE. The _NON_IKE is an IETF early draft
        version and not widly used.

v3->v4 removed refactoring since refactored function is only used once
        removed refcount on sk, sk is not used any more.
        fixed encap_type as Eyal recommended.
        removed un-necessary else since there is a goto before that.

We are not merging v4 and v6 functions at this moment. As it would
need more work to do it cleanly.


Steffen Klassert (3):
  xfrm: Use the XFRM_GRO to indicate a GRO call on input.
  xfrm: Support GRO for IPv4 ESP in UDP encapsulation.
  xfrm: Support GRO for IPv6 ESP in UDP encapsulation.

 include/net/gro.h        |  2 +-
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  4 ++
 net/ipv4/esp4_offload.c  |  6 ++-
 net/ipv4/udp.c           | 18 +++++++-
 net/ipv4/xfrm4_input.c   | 98 ++++++++++++++++++++++++++++++++--------
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 +++-
 net/ipv6/xfrm6_input.c   | 98 ++++++++++++++++++++++++++++++++--------
 net/xfrm/xfrm_input.c    |  6 +--
 10 files changed, 197 insertions(+), 49 deletions(-)

--
2.30.2

-antony

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v4 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input.
  2023-08-16  9:57 ` [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation Antony Antony
@ 2023-08-16  9:57   ` Antony Antony
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
  2 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16  9:57 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, Antony Antony, devel, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>

This is needed to support GRO for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/esp4_offload.c | 2 +-
 net/ipv6/esp6_offload.c | 2 +-
 net/xfrm/xfrm_input.c   | 6 ++----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..77bb01032667 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -76,7 +76,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..ee5f5abdb503 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -103,7 +103,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 39fb91ff23d9..7d4a0bb9ca8d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -466,7 +466,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct sec_path *sp;

-	if (encap_type < 0) {
+	if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
 		x = xfrm_input_state(skb);

 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -489,9 +489,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			seq = XFRM_SKB_CB(skb)->seq.input.low;
 			goto resume;
 		}
-
-		/* encap_type < -1 indicates a GRO call. */
-		encap_type = 0;
+		/* GRO call */
 		seq = XFRM_SPI_SKB_CB(skb)->seq;

 		if (xo && (xo->flags & CRYPTO_DONE)) {
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.
  2023-08-16  9:57 ` [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation Antony Antony
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
@ 2023-08-16  9:57   ` Antony Antony
  2023-08-16 11:15     ` Eyal Birger
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
  2 siblings, 1 reply; 28+ messages in thread
From: Antony Antony @ 2023-08-16  9:57 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, Antony Antony, devel, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/gro.h       |  2 +-
 include/net/xfrm.h      |  4 ++
 net/ipv4/esp4_offload.c |  6 ++-
 net/ipv4/udp.c          | 16 ++++++-
 net/ipv4/xfrm4_input.c  | 98 ++++++++++++++++++++++++++++++++---------
 5 files changed, 103 insertions(+), 23 deletions(-)

diff --git a/include/net/gro.h b/include/net/gro.h
index a4fab706240d..41c12c5d1ea1 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -29,7 +29,7 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;

-	/* Used in ipv6_gro_receive() and foo-over-udp */
+	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
 	u16	proto;

 	/* jiffies when first packet was created/queued */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 33ee3f5936e6..e980f442ddcd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
 int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm4_transport_finish(struct sk_buff *skb, int async);
 int xfrm4_rcv(struct sk_buff *skb);

@@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 77bb01032667..34ebfdf0e986 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;

@@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	xo->flags |= XFRM_GRO;

+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aa32afd871ee..337607b17ebd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
 	}
 }

+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+				       struct udp_sock *up)
+{
+#ifdef CONFIG_XFRM
+	if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
+		if (family == AF_INET)
+			up->gro_receive = xfrm4_gro_udp_encap_rcv;
+	}
+#endif
+}
+
 /*
  *	Socket option code for UDP
  */
@@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 #ifdef CONFIG_XFRM
 		case UDP_ENCAP_ESPINUDP:
+			set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
+			fallthrough;
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
 				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
-			else
 #endif
+			if (sk->sk_family == AF_INET)
 				up->encap_rcv = xfrm4_udp_encap_rcv;
 #endif
 			fallthrough;
@@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			udp_tunnel_encap_enable(sk->sk_socket);
 		up->gro_enabled = valbool;
 		up->accept_udp_l4 = valbool;
+		set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
 		release_sock(sk);
 		break;

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..b57f477c745e 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>

 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }

-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	__be32 *udpdata32;
 	__u16 encap_type = up->encap_type;

-	/* if this is not encapsulated socket, then just return now */
-	if (!encap_type)
+	/* if unknown encap_type then just return now */
+	if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)
 		return 1;

 	/* If this is a paged skb, make sure we pull up
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {

@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;

 	/* Now we can update and verify the packet length... */
 	iph = ip_hdr(skb);
@@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}

 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}

 	/* process ESP */
-	return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }

+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v4 ipsec-next 3/3] xfrm: Support GRO for IPv6 ESP in UDP encapsulation.
  2023-08-16  9:57 ` [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation Antony Antony
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-08-16  9:57   ` Antony Antony
  2 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16  9:57 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, Antony Antony, devel, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  4 +-
 net/ipv4/udp.c           |  2 +
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 +++-
 net/ipv6/xfrm6_input.c   | 98 ++++++++++++++++++++++++++++++++--------
 6 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..887d35f716c7 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
 #if IS_ENABLED(CONFIG_XFRM)
 	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+						   struct list_head *head,
+						   struct sk_buff *skb);
 	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
 			       int encap_type);
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e980f442ddcd..6b133be77e3b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1671,8 +1671,6 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
 int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
-struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
-					struct sk_buff *skb);
 int xfrm4_transport_finish(struct sk_buff *skb, int async);
 int xfrm4_rcv(struct sk_buff *skb);

@@ -1715,6 +1713,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
 					struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 337607b17ebd..5e75e672b4e3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2688,6 +2688,8 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
 	if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
 		if (family == AF_INET)
 			up->gro_receive = xfrm4_gro_udp_encap_rcv;
+		else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+			up->gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv;
 	}
 #endif
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2bbf13216a3d..0ba95226e07d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1046,6 +1046,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 #if IS_ENABLED(CONFIG_XFRM)
 	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+	.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
 	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ee5f5abdb503..a10d1e1cf544 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -33,7 +33,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	int off = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr;

-	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+	/* ESP or ESPINUDP */
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+		   ipv6_hdr->nexthdr == NEXTHDR_UDP))
 		return offsetof(struct ipv6hdr, nexthdr);

 	while (off < nhlen) {
@@ -53,10 +55,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 	int nhoff;

+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	if (!pskb_pull(skb, offset))
 		return NULL;

@@ -103,7 +109,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 04cbeefd8982..be3dbadc1e06 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>

 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t)
@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }

-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -86,8 +81,8 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	__be32 *udpdata32;
 	__u16 encap_type = up->encap_type;

-	/* if this is not encapsulated socket, then just return now */
-	if (!encap_type)
+	/* if unknown encap_type then just return now */
+	if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)
 		return 1;

 	/* If this is a paged skb, make sure we pull up
@@ -106,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -117,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {

@@ -135,31 +130,94 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;

 	/* Now we can update and verify the packet length... */
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
 	if (skb->len < ip6hlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}

 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}

 	/* process ESP */
-	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }

+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.
  2023-08-16  9:57   ` [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-08-16 11:15     ` Eyal Birger
  2023-08-16 16:59       ` Antony Antony
  0 siblings, 1 reply; 28+ messages in thread
From: Eyal Birger @ 2023-08-16 11:15 UTC (permalink / raw)
  To: antony.antony; +Cc: Steffen Klassert, Herbert Xu, devel, netdev

Hi Antony,

On Wed, Aug 16, 2023 at 12:57 PM Antony Antony
<antony.antony@secunet.com> wrote:
>
> From: Steffen Klassert <steffen.klassert@secunet.com>
>
> This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> packets. Decapsulation happens at L2 and saves a full round through
> the stack for each packet. This is also needed to support HW offload
> for ESP in UDP encapsulation.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> Co-developed-by: Antony Antony <antony.antony@secunet.com>
> Signed-off-by: Antony Antony <antony.antony@secunet.com>
> ---
>  include/net/gro.h       |  2 +-
>  include/net/xfrm.h      |  4 ++
>  net/ipv4/esp4_offload.c |  6 ++-
>  net/ipv4/udp.c          | 16 ++++++-
>  net/ipv4/xfrm4_input.c  | 98 ++++++++++++++++++++++++++++++++---------
>  5 files changed, 103 insertions(+), 23 deletions(-)
>
> diff --git a/include/net/gro.h b/include/net/gro.h
> index a4fab706240d..41c12c5d1ea1 100644
> --- a/include/net/gro.h
> +++ b/include/net/gro.h
> @@ -29,7 +29,7 @@ struct napi_gro_cb {
>         /* Number of segments aggregated. */
>         u16     count;
>
> -       /* Used in ipv6_gro_receive() and foo-over-udp */
> +       /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
>         u16     proto;
>
>         /* jiffies when first packet was created/queued */
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 33ee3f5936e6..e980f442ddcd 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
>  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
>  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
>                     int encap_type);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);

Why does this function need to be declared twice in this file?

>  int xfrm4_transport_finish(struct sk_buff *skb, int async);
>  int xfrm4_rcv(struct sk_buff *skb);
>
> @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
>  void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
>  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
>  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);
>  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
>                      int optlen);
>  #else
> diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> index 77bb01032667..34ebfdf0e986 100644
> --- a/net/ipv4/esp4_offload.c
> +++ b/net/ipv4/esp4_offload.c
> @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>         int offset = skb_gro_offset(skb);
>         struct xfrm_offload *xo;
>         struct xfrm_state *x;
> +       int encap_type = 0;
>         __be32 seq;
>         __be32 spi;
>
> @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         xo->flags |= XFRM_GRO;
>
> +       if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
> +               encap_type = UDP_ENCAP_ESPINUDP;
> +
>         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
>         XFRM_SPI_SKB_CB(skb)->family = AF_INET;
>         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         /* We don't need to handle errors from xfrm_input, it does all
>          * the error handling and frees the resources on error. */
> -       xfrm_input(skb, IPPROTO_ESP, spi, 0);
> +       xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
>
>         return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index aa32afd871ee..337607b17ebd 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
>         }
>  }
>
> +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
> +                                      struct udp_sock *up)
> +{
> +#ifdef CONFIG_XFRM
> +       if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
> +               if (family == AF_INET)
> +                       up->gro_receive = xfrm4_gro_udp_encap_rcv;
> +       }
> +#endif
> +}
> +
>  /*
>   *     Socket option code for UDP
>   */
> @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>                 case 0:
>  #ifdef CONFIG_XFRM
>                 case UDP_ENCAP_ESPINUDP:
> +                       set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
> +                       fallthrough;
>                 case UDP_ENCAP_ESPINUDP_NON_IKE:
>  #if IS_ENABLED(CONFIG_IPV6)
>                         if (sk->sk_family == AF_INET6)
>                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> -                       else
>  #endif
> +                       if (sk->sk_family == AF_INET)

Why is this change needed?

>                                 up->encap_rcv = xfrm4_udp_encap_rcv;
>  #endif
>                         fallthrough;
> @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>                         udp_tunnel_encap_enable(sk->sk_socket);
>                 up->gro_enabled = valbool;
>                 up->accept_udp_l4 = valbool;
> +               set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
>                 release_sock(sk);
>                 break;
>
> diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> index ad2afeef4f10..b57f477c745e 100644
> --- a/net/ipv4/xfrm4_input.c
> +++ b/net/ipv4/xfrm4_input.c
> @@ -17,6 +17,8 @@
>  #include <linux/netfilter_ipv4.h>
>  #include <net/ip.h>
>  #include <net/xfrm.h>
> +#include <net/protocol.h>
> +#include <net/gro.h>
>
>  static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
>                                    struct sk_buff *skb)
> @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
>         return 0;
>  }
>
> -/* If it's a keepalive packet, then just eat it.
> - * If it's an encapsulated packet, then pass it to the
> - * IPsec xfrm input.
> - * Returns 0 if skb passed to xfrm or was dropped.
> - * Returns >0 if skb should be passed to UDP.
> - * Returns <0 if skb should be resubmitted (-ret is protocol)
> - */
> -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
>  {
>         struct udp_sock *up = udp_sk(sk);
>         struct udphdr *uh;
> @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         __be32 *udpdata32;
>         __u16 encap_type = up->encap_type;
>
> -       /* if this is not encapsulated socket, then just return now */
> -       if (!encap_type)
> +       /* if unknown encap_type then just return now */
> +       if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)

This change is unclear to me - the patch adds support for GRO on
UDP_ENCAP_ESPINUDP.
How can we now get other encap types here? and why wasn't the old condition ok?

>                 return 1;
>
>         /* If this is a paged skb, make sure we pull up
> @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
>                         /* ESP Packet without Non-ESP header */
>                         len = sizeof(struct udphdr);
> @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP_NON_IKE:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
>                            udpdata32[0] == 0 && udpdata32[1] == 0) {
>
> @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>          * protocol to ESP, and then call into the transform receiver.
>          */
>         if (skb_unclone(skb, GFP_ATOMIC))
> -               goto drop;
> +               return -EINVAL;
>
>         /* Now we can update and verify the packet length... */
>         iph = ip_hdr(skb);
> @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         iph->tot_len = htons(ntohs(iph->tot_len) - len);
>         if (skb->len < iphlen + len) {
>                 /* packet is too small!?! */
> -               goto drop;
> +               return -EINVAL;
>         }
>
>         /* pull the data buffer up to the ESP header and set the
>          * transport header to point to ESP.  Keep UDP on the stack
>          * for later.
>          */
> -       __skb_pull(skb, len);
> -       skb_reset_transport_header(skb);
> +       if (pull) {
> +               __skb_pull(skb, len);
> +               skb_reset_transport_header(skb);
> +       } else {
> +               skb_set_transport_header(skb, len);
> +       }
>
>         /* process ESP */
> -       return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> -
> -drop:
> -       kfree_skb(skb);
>         return 0;
>  }
>
> +/* If it's a keepalive packet, then just eat it.
> + * If it's an encapsulated packet, then pass it to the
> + * IPsec xfrm input.
> + * Returns 0 if skb passed to xfrm or was dropped.
> + * Returns >0 if skb should be passed to UDP.
> + * Returns <0 if skb should be resubmitted (-ret is protocol)
> + */
> +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +       int ret;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> +       if (!ret)
> +               return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> +                                      udp_sk(sk)->encap_type);
> +
> +       if (ret < 0) {
> +               kfree_skb(skb);
> +               return 0;
> +       }
> +
> +       return ret;
> +}
> +
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb)
> +{
> +       int offset = skb_gro_offset(skb);
> +       const struct net_offload *ops;
> +       struct sk_buff *pp = NULL;
> +       int ret;
> +
> +       offset = offset - sizeof(struct udphdr);
> +
> +       if (!pskb_pull(skb, offset))
> +               return NULL;
> +
> +       rcu_read_lock();
> +       ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> +       if (!ops || !ops->callbacks.gro_receive)
> +               goto out;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> +       if (ret)
> +               goto out;
> +
> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> +
> +       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> +       rcu_read_unlock();
> +
> +       return pp;
> +
> +out:
> +       rcu_read_unlock();
> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->same_flow = 0;
> +       NAPI_GRO_CB(skb)->flush = 1;
> +
> +       return NULL;
> +}
> +
>  int xfrm4_rcv(struct sk_buff *skb)
>  {
>         return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                   ` (3 preceding siblings ...)
  2023-08-16  9:57 ` [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation Antony Antony
@ 2023-08-16 13:12 ` Antony Antony
  2023-08-16 13:12   ` [PATCH v5 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                     ` (3 more replies)
  2023-09-26 10:14 ` Antony Antony
  2023-10-04 13:04 ` [PATCH v7 " Antony Antony
  6 siblings, 4 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16 13:12 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, devel, netdev, Antony Antony

Hi,
Here I re-worked this patch set and here is v5 based of feed back from Eyal.


v1->v2 fixed error path added skb_push
        use is_fou instead of holding sk in skb.
        user configurable option to enable GRO; using UDP_GRO

v2->v3 only support GRO for UDP_ENCAP_ESPINUDP and not
        UDP_ENCAP_ESPINUDP_NON_IKE. The _NON_IKE is an IETF early draft
        version and not widly used.

v3->v4 removed refactoring since refactored function is only used once
        removed refcount on sk, sk is not used any more.
        fixed encap_type as Eyal recommended.
        removed un-necessary else since there is a goto before that.

v4->v5 removed extra code/checks that accidently got added.


Steffen Klassert (3):
  xfrm: Use the XFRM_GRO to indicate a GRO call on input
  xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  xfrm: Support GRO for IPv6 ESP in UDP encapsulation

 include/net/gro.h        |  2 +-
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  4 ++
 net/ipv4/esp4_offload.c  |  6 ++-
 net/ipv4/udp.c           | 16 +++++++
 net/ipv4/xfrm4_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 ++++-
 net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 net/xfrm/xfrm_input.c    |  6 +--
 10 files changed, 192 insertions(+), 44 deletions(-)

--
2.30.2


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v5 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input
  2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
@ 2023-08-16 13:12   ` Antony Antony
  2023-08-16 13:13   ` [PATCH v5 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16 13:12 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, devel, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This is needed to support GRO for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/esp4_offload.c | 2 +-
 net/ipv6/esp6_offload.c | 2 +-
 net/xfrm/xfrm_input.c   | 6 ++----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..77bb01032667 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -76,7 +76,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..ee5f5abdb503 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -103,7 +103,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 39fb91ff23d9..7d4a0bb9ca8d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -466,7 +466,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct sec_path *sp;
 
-	if (encap_type < 0) {
+	if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
 		x = xfrm_input_state(skb);
 
 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -489,9 +489,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			seq = XFRM_SKB_CB(skb)->seq.input.low;
 			goto resume;
 		}
-
-		/* encap_type < -1 indicates a GRO call. */
-		encap_type = 0;
+		/* GRO call */
 		seq = XFRM_SPI_SKB_CB(skb)->seq;
 
 		if (xo && (xo->flags & CRYPTO_DONE)) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v5 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
  2023-08-16 13:12   ` [PATCH v5 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
@ 2023-08-16 13:13   ` Antony Antony
  2023-08-16 13:13   ` [PATCH v5 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
  2023-08-17 11:52   ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Eyal Birger
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16 13:13 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, devel, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/gro.h       |  2 +-
 include/net/xfrm.h      |  2 +
 net/ipv4/esp4_offload.c |  6 ++-
 net/ipv4/udp.c          | 14 ++++++
 net/ipv4/xfrm4_input.c  | 94 +++++++++++++++++++++++++++++++++--------
 5 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/include/net/gro.h b/include/net/gro.h
index a4fab706240d..41c12c5d1ea1 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -29,7 +29,7 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;
 
-	/* Used in ipv6_gro_receive() and foo-over-udp */
+	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
 	u16	proto;
 
 	/* jiffies when first packet was created/queued */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 33ee3f5936e6..3be5d11bb983 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1711,6 +1711,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 77bb01032667..34ebfdf0e986 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 
@@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	xo->flags |= XFRM_GRO;
 
+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aa32afd871ee..caf1aa733219 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
 	}
 }
 
+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+				       struct udp_sock *up)
+{
+#ifdef CONFIG_XFRM
+	if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
+		if (family == AF_INET)
+			up->gro_receive = xfrm4_gro_udp_encap_rcv;
+	}
+#endif
+}
+
 /*
  *	Socket option code for UDP
  */
@@ -2730,6 +2741,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 #ifdef CONFIG_XFRM
 		case UDP_ENCAP_ESPINUDP:
+			set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
+			fallthrough;
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
@@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			udp_tunnel_encap_enable(sk->sk_socket);
 		up->gro_enabled = valbool;
 		up->accept_udp_l4 = valbool;
+		set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
 		release_sock(sk);
 		break;
 
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..2f75b68d7db7 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
 
 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }
 
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {
 
@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;
 
 	/* Now we can update and verify the packet length... */
 	iph = ip_hdr(skb);
@@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}
 
 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}
 
 	/* process ESP */
-	return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v5 ipsec-next 3/3] xfrm: Support GRO for IPv6 ESP in UDP encapsulation
  2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
  2023-08-16 13:12   ` [PATCH v5 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
  2023-08-16 13:13   ` [PATCH v5 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-08-16 13:13   ` Antony Antony
  2023-08-17 11:52   ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Eyal Birger
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16 13:13 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu; +Cc: Eyal Birger, devel, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  2 +
 net/ipv4/udp.c           |  2 +
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 ++++-
 net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 6 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..887d35f716c7 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
 #if IS_ENABLED(CONFIG_XFRM)
 	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+						   struct list_head *head,
+						   struct sk_buff *skb);
 	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
 			       int encap_type);
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3be5d11bb983..6b133be77e3b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1713,6 +1713,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
 					struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index caf1aa733219..a4c5e4b00dc8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2688,6 +2688,8 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
 	if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
 		if (family == AF_INET)
 			up->gro_receive = xfrm4_gro_udp_encap_rcv;
+		else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+			up->gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv;
 	}
 #endif
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2bbf13216a3d..0ba95226e07d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1046,6 +1046,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 #if IS_ENABLED(CONFIG_XFRM)
 	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+	.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
 	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ee5f5abdb503..a10d1e1cf544 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -33,7 +33,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	int off = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr;
 
-	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+	/* ESP or ESPINUDP */
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+		   ipv6_hdr->nexthdr == NEXTHDR_UDP))
 		return offsetof(struct ipv6hdr, nexthdr);
 
 	while (off < nhlen) {
@@ -53,10 +55,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 	int nhoff;
 
+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	if (!pskb_pull(skb, offset))
 		return NULL;
 
@@ -103,7 +109,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 04cbeefd8982..b77ab24f707b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t)
@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }
 
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -106,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -117,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {
 
@@ -135,31 +130,94 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;
 
 	/* Now we can update and verify the packet length... */
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
 	if (skb->len < ip6hlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}
 
 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}
 
 	/* process ESP */
-	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.
  2023-08-16 11:15     ` Eyal Birger
@ 2023-08-16 16:59       ` Antony Antony
  0 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-08-16 16:59 UTC (permalink / raw)
  To: Eyal Birger; +Cc: antony.antony, Steffen Klassert, Herbert Xu, devel, netdev

Hi Eyal,

Thanks for your quick review. I have addressed the points you raised for
both v4 and send v5 patches.

On Wed, Aug 16, 2023 at 14:15:01 +0300, Eyal Birger wrote:
> Hi Antony,
> 
> On Wed, Aug 16, 2023 at 12:57 PM Antony Antony
> <antony.antony@secunet.com> wrote:
> >
> > From: Steffen Klassert <steffen.klassert@secunet.com>
> >
> > This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> > packets. Decapsulation happens at L2 and saves a full round through
> > the stack for each packet. This is also needed to support HW offload
> > for ESP in UDP encapsulation.
> >
> > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> > Co-developed-by: Antony Antony <antony.antony@secunet.com>
> > Signed-off-by: Antony Antony <antony.antony@secunet.com>
> > ---
> >  include/net/gro.h       |  2 +-
> >  include/net/xfrm.h      |  4 ++
> >  net/ipv4/esp4_offload.c |  6 ++-
> >  net/ipv4/udp.c          | 16 ++++++-
> >  net/ipv4/xfrm4_input.c  | 98 ++++++++++++++++++++++++++++++++---------
> >  5 files changed, 103 insertions(+), 23 deletions(-)
> >
> > diff --git a/include/net/gro.h b/include/net/gro.h
> > index a4fab706240d..41c12c5d1ea1 100644
> > --- a/include/net/gro.h
> > +++ b/include/net/gro.h
> > @@ -29,7 +29,7 @@ struct napi_gro_cb {
> >         /* Number of segments aggregated. */
> >         u16     count;
> >
> > -       /* Used in ipv6_gro_receive() and foo-over-udp */
> > +       /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
> >         u16     proto;
> >
> >         /* jiffies when first packet was created/queued */
> > diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> > index 33ee3f5936e6..e980f442ddcd 100644
> > --- a/include/net/xfrm.h
> > +++ b/include/net/xfrm.h
> > @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
> >  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
> >  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
> >                     int encap_type);
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > +                                       struct sk_buff *skb);
> 
> Why does this function need to be declared twice in this file?

no need. Actully the following patch was removed it:) It is fixed in v5.

> 
> >  int xfrm4_transport_finish(struct sk_buff *skb, int async);
> >  int xfrm4_rcv(struct sk_buff *skb);
> >
> > @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
> >  void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
> >  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> >  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > +                                       struct sk_buff *skb);
> >  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
> >                      int optlen);
> >  #else
> > diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> > index 77bb01032667..34ebfdf0e986 100644
> > --- a/net/ipv4/esp4_offload.c
> > +++ b/net/ipv4/esp4_offload.c
> > @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >         int offset = skb_gro_offset(skb);
> >         struct xfrm_offload *xo;
> >         struct xfrm_state *x;
> > +       int encap_type = 0;
> >         __be32 seq;
> >         __be32 spi;
> >
> > @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >
> >         xo->flags |= XFRM_GRO;
> >
> > +       if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
> > +               encap_type = UDP_ENCAP_ESPINUDP;
> > +
> >         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
> >         XFRM_SPI_SKB_CB(skb)->family = AF_INET;
> >         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> > @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >
> >         /* We don't need to handle errors from xfrm_input, it does all
> >          * the error handling and frees the resources on error. */
> > -       xfrm_input(skb, IPPROTO_ESP, spi, 0);
> > +       xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
> >
> >         return ERR_PTR(-EINPROGRESS);
> >  out_reset:
> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > index aa32afd871ee..337607b17ebd 100644
> > --- a/net/ipv4/udp.c
> > +++ b/net/ipv4/udp.c
> > @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
> >         }
> >  }
> >
> > +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
> > +                                      struct udp_sock *up)
> > +{
> > +#ifdef CONFIG_XFRM
> > +       if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
> > +               if (family == AF_INET)
> > +                       up->gro_receive = xfrm4_gro_udp_encap_rcv;
> > +       }
> > +#endif
> > +}
> > +
> >  /*
> >   *     Socket option code for UDP
> >   */
> > @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> >                 case 0:
> >  #ifdef CONFIG_XFRM
> >                 case UDP_ENCAP_ESPINUDP:
> > +                       set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
> > +                       fallthrough;
> >                 case UDP_ENCAP_ESPINUDP_NON_IKE:
> >  #if IS_ENABLED(CONFIG_IPV6)
> >                         if (sk->sk_family == AF_INET6)
> >                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> > -                       else
> >  #endif
> > +                       if (sk->sk_family == AF_INET)
> 
> Why is this change needed?

It is not necessary. I removed it in v5.

> 
> >                                 up->encap_rcv = xfrm4_udp_encap_rcv;
> >  #endif
> >                         fallthrough;
> > @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> >                         udp_tunnel_encap_enable(sk->sk_socket);
> >                 up->gro_enabled = valbool;
> >                 up->accept_udp_l4 = valbool;
> > +               set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
> >                 release_sock(sk);
> >                 break;
> >
> > diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> > index ad2afeef4f10..b57f477c745e 100644
> > --- a/net/ipv4/xfrm4_input.c
> > +++ b/net/ipv4/xfrm4_input.c
> > @@ -17,6 +17,8 @@
> >  #include <linux/netfilter_ipv4.h>
> >  #include <net/ip.h>
> >  #include <net/xfrm.h>
> > +#include <net/protocol.h>
> > +#include <net/gro.h>
> >
> >  static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
> >                                    struct sk_buff *skb)
> > @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
> >         return 0;
> >  }
> >
> > -/* If it's a keepalive packet, then just eat it.
> > - * If it's an encapsulated packet, then pass it to the
> > - * IPsec xfrm input.
> > - * Returns 0 if skb passed to xfrm or was dropped.
> > - * Returns >0 if skb should be passed to UDP.
> > - * Returns <0 if skb should be resubmitted (-ret is protocol)
> > - */
> > -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
> >  {
> >         struct udp_sock *up = udp_sk(sk);
> >         struct udphdr *uh;
> > @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         __be32 *udpdata32;
> >         __u16 encap_type = up->encap_type;
> >
> > -       /* if this is not encapsulated socket, then just return now */
> > -       if (!encap_type)
> > +       /* if unknown encap_type then just return now */
> > +       if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)
> 
> This change is unclear to me - the patch adds support for GRO on
> UDP_ENCAP_ESPINUDP.

yes.
> How can we now get other encap types here? and why wasn't the old condition ok?

In the current code the old check is enoguh. I removed new code in v5.

> 
> >                 return 1;
> >
> >         /* If this is a paged skb, make sure we pull up
> > @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         case UDP_ENCAP_ESPINUDP:
> >                 /* Check if this is a keepalive packet.  If so, eat it. */
> >                 if (len == 1 && udpdata[0] == 0xff) {
> > -                       goto drop;
> > +                       return -EINVAL;
> >                 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
> >                         /* ESP Packet without Non-ESP header */
> >                         len = sizeof(struct udphdr);
> > @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         case UDP_ENCAP_ESPINUDP_NON_IKE:
> >                 /* Check if this is a keepalive packet.  If so, eat it. */
> >                 if (len == 1 && udpdata[0] == 0xff) {
> > -                       goto drop;
> > +                       return -EINVAL;
> >                 } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
> >                            udpdata32[0] == 0 && udpdata32[1] == 0) {
> >
> > @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >          * protocol to ESP, and then call into the transform receiver.
> >          */
> >         if (skb_unclone(skb, GFP_ATOMIC))
> > -               goto drop;
> > +               return -EINVAL;
> >
> >         /* Now we can update and verify the packet length... */
> >         iph = ip_hdr(skb);
> > @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         iph->tot_len = htons(ntohs(iph->tot_len) - len);
> >         if (skb->len < iphlen + len) {
> >                 /* packet is too small!?! */
> > -               goto drop;
> > +               return -EINVAL;
> >         }
> >
> >         /* pull the data buffer up to the ESP header and set the
> >          * transport header to point to ESP.  Keep UDP on the stack
> >          * for later.
> >          */
> > -       __skb_pull(skb, len);
> > -       skb_reset_transport_header(skb);
> > +       if (pull) {
> > +               __skb_pull(skb, len);
> > +               skb_reset_transport_header(skb);
> > +       } else {
> > +               skb_set_transport_header(skb, len);
> > +       }
> >
> >         /* process ESP */
> > -       return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> > -
> > -drop:
> > -       kfree_skb(skb);
> >         return 0;
> >  }
> >
> > +/* If it's a keepalive packet, then just eat it.
> > + * If it's an encapsulated packet, then pass it to the
> > + * IPsec xfrm input.
> > + * Returns 0 if skb passed to xfrm or was dropped.
> > + * Returns >0 if skb should be passed to UDP.
> > + * Returns <0 if skb should be resubmitted (-ret is protocol)
> > + */
> > +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > +{
> > +       int ret;
> > +
> > +       ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> > +       if (!ret)
> > +               return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> > +                                      udp_sk(sk)->encap_type);
> > +
> > +       if (ret < 0) {
> > +               kfree_skb(skb);
> > +               return 0;
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > +                                       struct sk_buff *skb)
> > +{
> > +       int offset = skb_gro_offset(skb);
> > +       const struct net_offload *ops;
> > +       struct sk_buff *pp = NULL;
> > +       int ret;
> > +
> > +       offset = offset - sizeof(struct udphdr);
> > +
> > +       if (!pskb_pull(skb, offset))
> > +               return NULL;
> > +
> > +       rcu_read_lock();
> > +       ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> > +       if (!ops || !ops->callbacks.gro_receive)
> > +               goto out;
> > +
> > +       ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> > +       if (ret)
> > +               goto out;
> > +
> > +       skb_push(skb, offset);
> > +       NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> > +
> > +       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> > +       rcu_read_unlock();
> > +
> > +       return pp;
> > +
> > +out:
> > +       rcu_read_unlock();
> > +       skb_push(skb, offset);
> > +       NAPI_GRO_CB(skb)->same_flow = 0;
> > +       NAPI_GRO_CB(skb)->flush = 1;
> > +
> > +       return NULL;
> > +}
> > +
> >  int xfrm4_rcv(struct sk_buff *skb)
> >  {
> >         return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> > --
> > 2.30.2
> >

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
                     ` (2 preceding siblings ...)
  2023-08-16 13:13   ` [PATCH v5 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
@ 2023-08-17 11:52   ` Eyal Birger
  3 siblings, 0 replies; 28+ messages in thread
From: Eyal Birger @ 2023-08-17 11:52 UTC (permalink / raw)
  To: antony.antony; +Cc: Steffen Klassert, Herbert Xu, devel, netdev

Hi Antony,

On Wed, Aug 16, 2023 at 4:12 PM Antony Antony <antony.antony@secunet.com> wrote:
>
> Hi,
> Here I re-worked this patch set and here is v5 based of feed back from Eyal.

I think the cover letter should include a little more information :)

Specifically I think it would be useful to add performance numbers, usage,
and the relevant caveats - especially the fact that UDP encapsulated flows
will no longer be connection tracked or protected by netfilter.

For the series itself:

Reviewed-by: Eyal Birger <eyal.birger@gmail.com>

>
>
> v1->v2 fixed error path added skb_push
>         use is_fou instead of holding sk in skb.
>         user configurable option to enable GRO; using UDP_GRO
>
> v2->v3 only support GRO for UDP_ENCAP_ESPINUDP and not
>         UDP_ENCAP_ESPINUDP_NON_IKE. The _NON_IKE is an IETF early draft
>         version and not widly used.
>
> v3->v4 removed refactoring since refactored function is only used once
>         removed refcount on sk, sk is not used any more.
>         fixed encap_type as Eyal recommended.
>         removed un-necessary else since there is a goto before that.
>
> v4->v5 removed extra code/checks that accidently got added.
>
>
> Steffen Klassert (3):
>   xfrm: Use the XFRM_GRO to indicate a GRO call on input
>   xfrm: Support GRO for IPv4 ESP in UDP encapsulation
>   xfrm: Support GRO for IPv6 ESP in UDP encapsulation
>
>  include/net/gro.h        |  2 +-
>  include/net/ipv6_stubs.h |  3 ++
>  include/net/xfrm.h       |  4 ++
>  net/ipv4/esp4_offload.c  |  6 ++-
>  net/ipv4/udp.c           | 16 +++++++
>  net/ipv4/xfrm4_input.c   | 94 ++++++++++++++++++++++++++++++++--------
>  net/ipv6/af_inet6.c      |  1 +
>  net/ipv6/esp6_offload.c  | 10 ++++-
>  net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
>  net/xfrm/xfrm_input.c    |  6 +--
>  10 files changed, 192 insertions(+), 44 deletions(-)
>
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                   ` (4 preceding siblings ...)
  2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
@ 2023-09-26 10:14 ` Antony Antony
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                     ` (3 more replies)
  2023-10-04 13:04 ` [PATCH v7 " Antony Antony
  6 siblings, 4 replies; 28+ messages in thread
From: Antony Antony @ 2023-09-26 10:14 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, devel, Eric Dumazet, netdev, Antony Antony

Hi,

I have rebased the patch set to latest ipsec-next. There was a big change to udp socket encapsulation data structure.

Eyal, would please review patch set quickly? focus specifically chages due to

70a36f571362 ("udp: annotate data-races around udp->encap_type")
ac9a7f4ce5dd ("udp: lockless UDP_ENCAP_L2TPINUDP / UDP_GRO")
I hope I incorprated these changes correctly.

v1->v2 fixed error path added skb_push
        use is_fou instead of holding sk in skb.
        user configurable option to enable GRO; using UDP_GRO

v2->v3 only support GRO for UDP_ENCAP_ESPINUDP and not
        UDP_ENCAP_ESPINUDP_NON_IKE. The _NON_IKE is an IETF early draft
        version and not widly used.

v3->v4 removed refactoring since refactored function is only used once
        removed refcount on sk, sk is not used any more.
        fixed encap_type as Eyal recommended.
        removed un-necessary else since there is a goto before that.

v4->v5 removed extra code/checks that accidently got added.

v5->v6 rebased to ipsec-next chages due lockless scket udp
       encapsulation options

Steffen Klassert (3):
  xfrm: Use the XFRM_GRO to indicate a GRO call on input
  xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  xfrm: Support GRO for IPv6 ESP in UDP encapsulation

 include/net/gro.h        |  2 +-
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  4 ++
 net/ipv4/esp4_offload.c  |  6 ++-
 net/ipv4/udp.c           | 16 +++++++
 net/ipv4/xfrm4_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 ++++-
 net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 net/xfrm/xfrm_input.c    |  6 +--
 10 files changed, 192 insertions(+), 44 deletions(-)

--
2.30.2


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input
  2023-09-26 10:14 ` Antony Antony
@ 2023-09-26 10:15   ` Antony Antony
  2023-09-29  6:12     ` Steffen Klassert
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 28+ messages in thread
From: Antony Antony @ 2023-09-26 10:15 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, Eric Dumazet, devel, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This is needed to support GRO for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/esp4_offload.c | 2 +-
 net/ipv6/esp6_offload.c | 2 +-
 net/xfrm/xfrm_input.c   | 6 ++----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 10e96ed6c9e3..5b487d12d0cf 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -77,7 +77,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index a189e08370a5..19ff2bceb4e1 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -104,7 +104,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index d5ee96789d4b..bd4ce21d76d7 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct sec_path *sp;

-	if (encap_type < 0) {
+	if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
 		x = xfrm_input_state(skb);

 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			seq = XFRM_SKB_CB(skb)->seq.input.low;
 			goto resume;
 		}
-
-		/* encap_type < -1 indicates a GRO call. */
-		encap_type = 0;
+		/* GRO call */
 		seq = XFRM_SPI_SKB_CB(skb)->seq;

 		if (xo && (xo->flags & CRYPTO_DONE)) {
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v6 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  2023-09-26 10:14 ` Antony Antony
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
@ 2023-09-26 10:15   ` Antony Antony
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
  2023-09-26 13:07   ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Eyal Birger
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-09-26 10:15 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, Eric Dumazet, devel, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/gro.h       |  2 +-
 include/net/xfrm.h      |  2 +
 net/ipv4/esp4_offload.c |  6 ++-
 net/ipv4/udp.c          | 14 ++++++
 net/ipv4/xfrm4_input.c  | 94 +++++++++++++++++++++++++++++++++--------
 5 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/include/net/gro.h b/include/net/gro.h
index 88644b3ca660..b435f0ddbf64 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -41,7 +41,7 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;

-	/* Used in ipv6_gro_receive() and foo-over-udp */
+	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
 	u16	proto;

 /* Used in napi_gro_cb::free */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 98d7aa78adda..dd8b21cf62b4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1712,6 +1712,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 5b487d12d0cf..b3271957ad9a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;

@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	xo->flags |= XFRM_GRO;

+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c3ff984b6354..b8d7c5e86d0d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2625,6 +2625,17 @@ void udp_destroy_sock(struct sock *sk)
 	}
 }

+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+				       struct sock *sk)
+{
+#ifdef CONFIG_XFRM
+	if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
+		if (family == AF_INET)
+			WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
+	}
+#endif
+}
+
 /*
  *	Socket option code for UDP
  */
@@ -2674,6 +2685,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 #ifdef CONFIG_XFRM
 		case UDP_ENCAP_ESPINUDP:
+			set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
+			fallthrough;
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
@@ -2716,6 +2729,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			udp_tunnel_encap_enable(sk);
 		udp_assign_bit(GRO_ENABLED, sk, valbool);
 		udp_assign_bit(ACCEPT_L4, sk, valbool);
+		set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
 		break;

 	/*
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 183f6dc37242..42879c5e026a 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>

 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }

-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {

@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;

 	/* Now we can update and verify the packet length... */
 	iph = ip_hdr(skb);
@@ -147,25 +142,88 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}

 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}

 	/* process ESP */
-	return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm4_udp_encap_rcv);

+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v6 ipsec-next 3/3] xfrm: Support GRO for IPv6 ESP in UDP encapsulation
  2023-09-26 10:14 ` Antony Antony
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-09-26 10:15   ` Antony Antony
  2023-09-26 13:07   ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Eyal Birger
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-09-26 10:15 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, Eric Dumazet, devel, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  2 +
 net/ipv4/udp.c           |  2 +
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 ++++-
 net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 6 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..887d35f716c7 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
 #if IS_ENABLED(CONFIG_XFRM)
 	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+						   struct list_head *head,
+						   struct sk_buff *skb);
 	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
 			       int encap_type);
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dd8b21cf62b4..4165ad01ef3b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1714,6 +1714,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
 					struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b8d7c5e86d0d..7fdc250e0679 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2632,6 +2632,8 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
 	if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
 		if (family == AF_INET)
 			WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
+		else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+			WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
 	}
 #endif
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c6ad0d6e99b5..7dd8aeb555cf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1049,6 +1049,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 #if IS_ENABLED(CONFIG_XFRM)
 	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+	.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
 	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 19ff2bceb4e1..527b7caddbc6 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	int off = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr;

-	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+	/* ESP or ESPINUDP */
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+		   ipv6_hdr->nexthdr == NEXTHDR_UDP))
 		return offsetof(struct ipv6hdr, nexthdr);

 	while (off < nhlen) {
@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 	int nhoff;

+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	if (!pskb_pull(skb, offset))
 		return NULL;

@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4156387248e4..ccf79b84c061 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>

 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t)
@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }

-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -109,7 +104,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -120,7 +115,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {

@@ -138,31 +133,94 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;

 	/* Now we can update and verify the packet length... */
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
 	if (skb->len < ip6hlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}

 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}

 	/* process ESP */
-	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }

+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-09-26 10:14 ` Antony Antony
                     ` (2 preceding siblings ...)
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
@ 2023-09-26 13:07   ` Eyal Birger
  3 siblings, 0 replies; 28+ messages in thread
From: Eyal Birger @ 2023-09-26 13:07 UTC (permalink / raw)
  To: antony.antony; +Cc: Steffen Klassert, Herbert Xu, devel, Eric Dumazet, netdev

Hi Antony,

On Tue, Sep 26, 2023 at 1:14 PM Antony Antony <antony.antony@secunet.com> wrote:
>
> Hi,
>
> I have rebased the patch set to latest ipsec-next. There was a big change to udp socket encapsulation data structure.
>
> Eyal, would please review patch set quickly? focus specifically chages due to
>
> 70a36f571362 ("udp: annotate data-races around udp->encap_type")
> ac9a7f4ce5dd ("udp: lockless UDP_ENCAP_L2TPINUDP / UDP_GRO")
> I hope I incorprated these changes correctly.

LGTM.

I think a cover letter explaining the feature, usage, performance,
caveats etc, would be helpful.

For the series:

Reviewed-by: Eyal Birger <eyal.birger@gmail.com>

>
> v1->v2 fixed error path added skb_push
>         use is_fou instead of holding sk in skb.
>         user configurable option to enable GRO; using UDP_GRO
>
> v2->v3 only support GRO for UDP_ENCAP_ESPINUDP and not
>         UDP_ENCAP_ESPINUDP_NON_IKE. The _NON_IKE is an IETF early draft
>         version and not widly used.
>
> v3->v4 removed refactoring since refactored function is only used once
>         removed refcount on sk, sk is not used any more.
>         fixed encap_type as Eyal recommended.
>         removed un-necessary else since there is a goto before that.
>
> v4->v5 removed extra code/checks that accidently got added.
>
> v5->v6 rebased to ipsec-next chages due lockless scket udp
>        encapsulation options
>
> Steffen Klassert (3):
>   xfrm: Use the XFRM_GRO to indicate a GRO call on input
>   xfrm: Support GRO for IPv4 ESP in UDP encapsulation
>   xfrm: Support GRO for IPv6 ESP in UDP encapsulation
>
>  include/net/gro.h        |  2 +-
>  include/net/ipv6_stubs.h |  3 ++
>  include/net/xfrm.h       |  4 ++
>  net/ipv4/esp4_offload.c  |  6 ++-
>  net/ipv4/udp.c           | 16 +++++++
>  net/ipv4/xfrm4_input.c   | 94 ++++++++++++++++++++++++++++++++--------
>  net/ipv6/af_inet6.c      |  1 +
>  net/ipv6/esp6_offload.c  | 10 ++++-
>  net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
>  net/xfrm/xfrm_input.c    |  6 +--
>  10 files changed, 192 insertions(+), 44 deletions(-)
>
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input
  2023-09-26 10:15   ` [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
@ 2023-09-29  6:12     ` Steffen Klassert
  0 siblings, 0 replies; 28+ messages in thread
From: Steffen Klassert @ 2023-09-29  6:12 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, Eyal Birger, Eric Dumazet, devel, netdev

On Tue, Sep 26, 2023 at 12:15:03PM +0200, Antony Antony wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> 
> This is needed to support GRO for ESP in UDP encapsulation.
> 
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> Co-developed-by: Antony Antony <antony.antony@secunet.com>
> Signed-off-by: Antony Antony <antony.antony@secunet.com>

Thanks for picking up that pachset!

Please provide a bit more information what we are doing
with that patch in the commit message. Otherwise the
patchset looks OK and is ready to merge.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v7 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                   ` (5 preceding siblings ...)
  2023-09-26 10:14 ` Antony Antony
@ 2023-10-04 13:04 ` Antony Antony
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
                     ` (3 more replies)
  6 siblings, 4 replies; 28+ messages in thread
From: Antony Antony @ 2023-10-04 13:04 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, devel, Eric Dumazet, netdev, Antony Antony

Hi,

I have added how to enable this feature, and more description to the second
patch. Here is copy of that.

xfrm: Support GRO for IPv4i & IPv6 ESP in UDP encapsulation

This patchset enables the GRO codepath for ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Enabling this would imporove performance for ESP in UDP datapath, i.e
IPsec with NAT in between. Our initial tests show 20% improvement.


By default GRP for ESP-in-UDP is disabled for UDP sockets.
To enable this feature for an ESP socket, the following two options
need to be set:
 1. enable ESP-in-UDP: (this is already set by an IKE daemon).
    int type = UDP_ENCAP_ESPINUDP;
    setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type));

 2. To enable GRO for ESP in UDP socket:
    type = true;
    setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type));

Enabling ESP-in-UDP has the side effect of preventing the Linux stack from
seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are
immediately decapsulated from UDP and decrypted.
This change may affect nftable rules that match on ESP packets  at L3.
Also tcpdump won't see the ESP packet.

Developers/admins are advised to review and adapt any nftable rules
accordingly before enabling this feature to prevent potential rule breakage.
Also tcpdump will not see from ESP packets from a ESP in UDP flow when this
is enabled.

---

Initial, a quick test showed performance difference of about 20%
impromvent on the receiver, when using iperf, tcp flow, over ESP in UDP.

Steffen Klassert (3):
  xfrm: Use the XFRM_GRO to indicate a GRO call on input
  xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  xfrm: Support GRO for IPv6 ESP in UDP encapsulation

 include/net/gro.h        |  2 +-
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  4 ++
 net/ipv4/esp4_offload.c  |  6 ++-
 net/ipv4/udp.c           | 16 +++++++
 net/ipv4/xfrm4_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 ++++-
 net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 net/xfrm/xfrm_input.c    |  6 +--
 10 files changed, 192 insertions(+), 44 deletions(-)

--
2.30.2


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v7 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input
  2023-10-04 13:04 ` [PATCH v7 " Antony Antony
@ 2023-10-04 13:05   ` Antony Antony
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-10-04 13:05 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, devel, Eric Dumazet, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This is needed to support GRO for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/esp4_offload.c | 2 +-
 net/ipv6/esp6_offload.c | 2 +-
 net/xfrm/xfrm_input.c   | 6 ++----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 10e96ed6c9e3..5b487d12d0cf 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -77,7 +77,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index a189e08370a5..19ff2bceb4e1 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -104,7 +104,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, -2);
+	xfrm_input(skb, IPPROTO_ESP, spi, 0);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index d5ee96789d4b..bd4ce21d76d7 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct sec_path *sp;
 
-	if (encap_type < 0) {
+	if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
 		x = xfrm_input_state(skb);
 
 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			seq = XFRM_SKB_CB(skb)->seq.input.low;
 			goto resume;
 		}
-
-		/* encap_type < -1 indicates a GRO call. */
-		encap_type = 0;
+		/* GRO call */
 		seq = XFRM_SPI_SKB_CB(skb)->seq;
 
 		if (xo && (xo->flags & CRYPTO_DONE)) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v7 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  2023-10-04 13:04 ` [PATCH v7 " Antony Antony
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
@ 2023-10-04 13:05   ` Antony Antony
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
  2023-10-06  5:49   ` [PATCH v7 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Steffen Klassert
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-10-04 13:05 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, devel, Eric Dumazet, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Enabling this would imporove performance for ESP in UDP datapath, i.e
IPsec with NAT in between.

By default GRP for ESP-in-UDP is disabled for UDP sockets.
To enable this feature for an ESP socket, the following two options
need to be set:
1. enable ESP-in-UDP: (this is already set by an IKE daemon).
   int type = UDP_ENCAP_ESPINUDP;
   setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type));

2. To enable GRO for ESP in UDP socket:
   type = true;
   setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type));

Enabling ESP-in-UDP has the side effect of preventing the Linux stack from
seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are
immediately decapsulated from UDP and decrypted.
This change may affect nftable rules that match on ESP packets at L3.
Also tcpdump won't see the ESP packet.

Developers/admins are advised to review and adapt any nftable rules
accordingly before enabling this feature to prevent potential rule breakage.
Also tcpdump will not see from ESP packets from a ESP in UDP flow, when this
is enabled.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/gro.h       |  2 +-
 include/net/xfrm.h      |  2 +
 net/ipv4/esp4_offload.c |  6 ++-
 net/ipv4/udp.c          | 14 ++++++
 net/ipv4/xfrm4_input.c  | 94 +++++++++++++++++++++++++++++++++--------
 5 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/include/net/gro.h b/include/net/gro.h
index 88644b3ca660..b435f0ddbf64 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -41,7 +41,7 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;

-	/* Used in ipv6_gro_receive() and foo-over-udp */
+	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
 	u16	proto;

 /* Used in napi_gro_cb::free */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 98d7aa78adda..dd8b21cf62b4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1712,6 +1712,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 5b487d12d0cf..b3271957ad9a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;

@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	xo->flags |= XFRM_GRO;

+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c3ff984b6354..b8d7c5e86d0d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2625,6 +2625,17 @@ void udp_destroy_sock(struct sock *sk)
 	}
 }

+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+				       struct sock *sk)
+{
+#ifdef CONFIG_XFRM
+	if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
+		if (family == AF_INET)
+			WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
+	}
+#endif
+}
+
 /*
  *	Socket option code for UDP
  */
@@ -2674,6 +2685,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 #ifdef CONFIG_XFRM
 		case UDP_ENCAP_ESPINUDP:
+			set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
+			fallthrough;
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
@@ -2716,6 +2729,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			udp_tunnel_encap_enable(sk);
 		udp_assign_bit(GRO_ENABLED, sk, valbool);
 		udp_assign_bit(ACCEPT_L4, sk, valbool);
+		set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
 		break;

 	/*
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 183f6dc37242..42879c5e026a 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>

 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }

-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {

@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;

 	/* Now we can update and verify the packet length... */
 	iph = ip_hdr(skb);
@@ -147,25 +142,88 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}

 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}

 	/* process ESP */
-	return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm4_udp_encap_rcv);

+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
--
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v7 ipsec-next 3/3] xfrm: Support GRO for IPv6 ESP in UDP encapsulation
  2023-10-04 13:04 ` [PATCH v7 " Antony Antony
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
@ 2023-10-04 13:05   ` Antony Antony
  2023-10-06  5:49   ` [PATCH v7 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Steffen Klassert
  3 siblings, 0 replies; 28+ messages in thread
From: Antony Antony @ 2023-10-04 13:05 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: Eyal Birger, devel, Eric Dumazet, netdev, Antony Antony

From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/ipv6_stubs.h |  3 ++
 include/net/xfrm.h       |  2 +
 net/ipv4/udp.c           |  2 +
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/esp6_offload.c  | 10 ++++-
 net/ipv6/xfrm6_input.c   | 94 ++++++++++++++++++++++++++++++++--------
 6 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..887d35f716c7 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
 #if IS_ENABLED(CONFIG_XFRM)
 	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+						   struct list_head *head,
+						   struct sk_buff *skb);
 	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
 			       int encap_type);
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dd8b21cf62b4..4165ad01ef3b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1714,6 +1714,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
 					struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b8d7c5e86d0d..7fdc250e0679 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2632,6 +2632,8 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
 	if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
 		if (family == AF_INET)
 			WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
+		else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+			WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
 	}
 #endif
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c6ad0d6e99b5..7dd8aeb555cf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1049,6 +1049,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 #if IS_ENABLED(CONFIG_XFRM)
 	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+	.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
 	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 19ff2bceb4e1..527b7caddbc6 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	int off = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr;
 
-	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+	/* ESP or ESPINUDP */
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+		   ipv6_hdr->nexthdr == NEXTHDR_UDP))
 		return offsetof(struct ipv6hdr, nexthdr);
 
 	while (off < nhlen) {
@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;
 	int nhoff;
 
+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	if (!pskb_pull(skb, offset))
 		return NULL;
 
@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 
 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
 
 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4156387248e4..ccf79b84c061 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t)
@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }
 
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -109,7 +104,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -120,7 +115,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {
 
@@ -138,31 +133,94 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;
 
 	/* Now we can update and verify the packet length... */
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
 	if (skb->len < ip6hlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}
 
 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}
 
 	/* process ESP */
-	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }
 
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v7 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation
  2023-10-04 13:04 ` [PATCH v7 " Antony Antony
                     ` (2 preceding siblings ...)
  2023-10-04 13:05   ` [PATCH v7 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
@ 2023-10-06  5:49   ` Steffen Klassert
  3 siblings, 0 replies; 28+ messages in thread
From: Steffen Klassert @ 2023-10-06  5:49 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, Eyal Birger, devel, Eric Dumazet, netdev

On Wed, Oct 04, 2023 at 03:04:53PM +0200, Antony Antony wrote:
> Hi,
> 
> I have added how to enable this feature, and more description to the second
> patch. Here is copy of that.
> 
> xfrm: Support GRO for IPv4i & IPv6 ESP in UDP encapsulation
> 
> This patchset enables the GRO codepath for ESP in UDP encapsulated
> packets. Decapsulation happens at L2 and saves a full round through
> the stack for each packet. This is also needed to support HW offload
> for ESP in UDP encapsulation.
> 
> Enabling this would imporove performance for ESP in UDP datapath, i.e
> IPsec with NAT in between. Our initial tests show 20% improvement.
> 
> 
> By default GRP for ESP-in-UDP is disabled for UDP sockets.
> To enable this feature for an ESP socket, the following two options
> need to be set:
>  1. enable ESP-in-UDP: (this is already set by an IKE daemon).
>     int type = UDP_ENCAP_ESPINUDP;
>     setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type));
> 
>  2. To enable GRO for ESP in UDP socket:
>     type = true;
>     setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type));
> 
> Enabling ESP-in-UDP has the side effect of preventing the Linux stack from
> seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are
> immediately decapsulated from UDP and decrypted.
> This change may affect nftable rules that match on ESP packets  at L3.
> Also tcpdump won't see the ESP packet.
> 
> Developers/admins are advised to review and adapt any nftable rules
> accordingly before enabling this feature to prevent potential rule breakage.
> Also tcpdump will not see from ESP packets from a ESP in UDP flow when this
> is enabled.
> 
> ---
> 
> Initial, a quick test showed performance difference of about 20%
> impromvent on the receiver, when using iperf, tcp flow, over ESP in UDP.
> 
> Steffen Klassert (3):
>   xfrm: Use the XFRM_GRO to indicate a GRO call on input
>   xfrm: Support GRO for IPv4 ESP in UDP encapsulation
>   xfrm: Support GRO for IPv6 ESP in UDP encapsulation
> 

Series applied to ipsec-next.

Thanks so much for finalizing this work Antony!

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2023-10-06  5:49 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-19 19:33 [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
2023-01-19 19:33 ` [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
2023-01-20 11:04   ` Eyal Birger
2023-01-19 19:34 ` [PATCH 3/3] xfrm: Support GRO for IPv6 " Antony Antony
2023-02-06 17:18   ` Pablo Neira Ayuso
2023-01-20 11:05 ` [PATCH 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Eyal Birger
2023-08-16  9:57 ` [PATCH v4 ipsec-next 0/3] xfrm: Support GRO decapsulation for ESP in UDP encapsulation Antony Antony
2023-08-16  9:57   ` [PATCH v4 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
2023-08-16  9:57   ` [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
2023-08-16 11:15     ` Eyal Birger
2023-08-16 16:59       ` Antony Antony
2023-08-16  9:57   ` [PATCH v4 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
2023-08-16 13:12 ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Antony Antony
2023-08-16 13:12   ` [PATCH v5 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
2023-08-16 13:13   ` [PATCH v5 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
2023-08-16 13:13   ` [PATCH v5 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
2023-08-17 11:52   ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Eyal Birger
2023-09-26 10:14 ` Antony Antony
2023-09-26 10:15   ` [PATCH v6 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
2023-09-29  6:12     ` Steffen Klassert
2023-09-26 10:15   ` [PATCH v6 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
2023-09-26 10:15   ` [PATCH v6 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
2023-09-26 13:07   ` [PATCH v5 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Eyal Birger
2023-10-04 13:04 ` [PATCH v7 " Antony Antony
2023-10-04 13:05   ` [PATCH v7 ipsec-next 1/3] xfrm: Use the XFRM_GRO to indicate a GRO call on input Antony Antony
2023-10-04 13:05   ` [PATCH v7 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation Antony Antony
2023-10-04 13:05   ` [PATCH v7 ipsec-next 3/3] xfrm: Support GRO for IPv6 " Antony Antony
2023-10-06  5:49   ` [PATCH v7 ipsec-next 0/3] xfrm: Support GRO decapsulation for " Steffen Klassert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).