* [PATCH net-next 1/7] net: Move fou_build_header into fou.c and refactor
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
@ 2014-11-01 22:57 ` Tom Herbert
2014-11-01 22:57 ` [PATCH net-next 2/7] udp: Offload outer UDP tunnel csum if available Tom Herbert
` (6 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:57 UTC (permalink / raw)
To: davem, netdev
Move fou_build_header out of ip_tunnel.c and into fou.c splitting
it up into fou_build_header, gue_build_header, and fou_build_udp.
This allows for other users for TX of FOU or GUE. Change ip_tunnel_encap
to call fou_build_header or gue_build_header based on the tunnel
encapsulation type. Similarly, added fou_encap_hlen and gue_encap_hlen
functions which are called by ip_encap_hlen. New net/fou.h has
prototypes and defines for this.
Added NET_FOU_IP_TUNNELS configuration. When this is set, IP tunnels
can use FOU/GUE and fou module is also selected.
Signed-off-by: Tom Herbert <therbert@google.com>
---
include/net/fou.h | 26 +++++++++++++++++++
net/ipv4/Kconfig | 9 +++++++
net/ipv4/fou.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/ip_tunnel.c | 61 +++++++++----------------------------------
4 files changed, 120 insertions(+), 49 deletions(-)
create mode 100644 include/net/fou.h
diff --git a/include/net/fou.h b/include/net/fou.h
new file mode 100644
index 0000000..cf4ce88
--- /dev/null
+++ b/include/net/fou.h
@@ -0,0 +1,26 @@
+#ifndef __NET_FOU_H
+#define __NET_FOU_H
+
+#include <linux/skbuff.h>
+
+#include <net/flow.h>
+#include <net/gue.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+
+int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4);
+int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4);
+
+static size_t fou_encap_hlen(struct ip_tunnel_encap *e)
+{
+ return sizeof(struct udphdr);
+}
+
+static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
+{
+ return sizeof(struct udphdr) + sizeof(struct guehdr);
+}
+
+#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e682b48..bd29016 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -322,6 +322,15 @@ config NET_FOU
network mechanisms and optimizations for UDP (such as ECMP
and RSS) can be leveraged to provide better service.
+config NET_FOU_IP_TUNNELS
+ bool "IP: FOU encapsulation of IP tunnels"
+ depends on NET_IPIP || NET_IPGRE || IPV6_SIT
+ select NET_FOU
+ ---help---
+ Allow configuration of FOU or GUE encapsulation for IP tunnels.
+ When this option is enabled IP tunnels can be configured to use
+ FOU or GUE encapsulation.
+
config GENEVE
tristate "Generic Network Virtualization Encapsulation (Geneve)"
depends on INET
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 32e7892..5446c1c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -487,6 +487,79 @@ static const struct genl_ops fou_nl_ops[] = {
},
};
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ __be16 sport;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(fou_build_header);
+
+int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ struct guehdr *guehdr;
+ size_t hdr_len = sizeof(struct guehdr);
+ __be16 sport;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get source port (based on flow hash) before skb_push */
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ skb_push(skb, hdr_len);
+
+ guehdr = (struct guehdr *)skb->data;
+
+ guehdr->version = 0;
+ guehdr->hlen = 0;
+ guehdr->flags = 0;
+ guehdr->next_hdr = *protocol;
+
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(gue_build_header);
+
static int __init fou_init(void)
{
int ret;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0bb8e14..c3587e1 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,7 +56,10 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
-#include <net/gue.h>
+
+#if IS_ENABLED(CONFIG_NET_FOU)
+#include <net/fou.h>
+#endif
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -494,10 +497,12 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
switch (e->type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
- return sizeof(struct udphdr);
+ return fou_encap_hlen(e);
case TUNNEL_ENCAP_GUE:
- return sizeof(struct udphdr) + sizeof(struct guehdr);
+ return gue_encap_hlen(e);
+#endif
default:
return -EINVAL;
}
@@ -526,60 +531,18 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t,
}
EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
-static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
-{
- struct udphdr *uh;
- __be16 sport;
- bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
- int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-
- skb = iptunnel_handle_offloads(skb, csum, type);
-
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- /* Get length and hash before making space in skb */
-
- sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
- skb, 0, 0, false);
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- if (e->type == TUNNEL_ENCAP_GUE) {
- struct guehdr *guehdr = (struct guehdr *)&uh[1];
-
- guehdr->version = 0;
- guehdr->hlen = 0;
- guehdr->flags = 0;
- guehdr->next_hdr = *protocol;
- }
-
- uh->dest = e->dport;
- uh->source = sport;
- uh->len = htons(skb->len);
- uh->check = 0;
- udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
- fl4->saddr, fl4->daddr, skb->len);
-
- *protocol = IPPROTO_UDP;
-
- return 0;
-}
-
int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
u8 *protocol, struct flowi4 *fl4)
{
switch (t->encap.type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
+ return fou_build_header(skb, &t->encap, protocol, fl4);
case TUNNEL_ENCAP_GUE:
- return fou_build_header(skb, &t->encap, t->encap_hlen,
- protocol, fl4);
+ return gue_build_header(skb, &t->encap, protocol, fl4);
+#endif
default:
return -EINVAL;
}
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH net-next 2/7] udp: Offload outer UDP tunnel csum if available
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
2014-11-01 22:57 ` [PATCH net-next 1/7] net: Move fou_build_header into fou.c and refactor Tom Herbert
@ 2014-11-01 22:57 ` Tom Herbert
2014-11-01 22:57 ` [PATCH net-next 3/7] gue: Add infrastructure for flags and options Tom Herbert
` (5 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:57 UTC (permalink / raw)
To: davem, netdev
In __skb_udp_tunnel_segment if outer UDP checksums are enabled and
ip_summed is not already CHECKSUM_PARTIAL, set up checksum offload
if device features allow it.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/ipv4/udp_offload.c | 52 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 36 insertions(+), 16 deletions(-)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6480cea..a774711 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,7 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features),
- __be16 new_protocol)
+ __be16 new_protocol, bool is_ipv6)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
@@ -39,7 +39,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
- bool need_csum;
+ bool need_csum = !!(skb_shinfo(skb)->gso_type &
+ SKB_GSO_UDP_TUNNEL_CSUM);
+ bool offload_csum = false, dont_encap = need_csum;
oldlen = (u16)~skb->len;
@@ -52,10 +54,12 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
+ skb->encap_hdr_csum = need_csum;
- need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
- if (need_csum)
- skb->encap_hdr_csum = 1;
+ /* Try to offload checksum if possible */
+ offload_csum = !!(need_csum &&
+ (skb->dev->features &
+ (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM)));
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
@@ -72,11 +76,21 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
do {
struct udphdr *uh;
int len;
-
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ __be32 delta;
+
+ if (dont_encap) {
+ skb->encapsulation = 0;
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* Only set up inner headers if we might be offloading
+ * inner checksum.
+ */
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
skb->mac_len = mac_len;
+ skb->protocol = protocol;
skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
@@ -86,19 +100,25 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
uh = udp_hdr(skb);
uh->len = htons(len);
- if (need_csum) {
- __be32 delta = htonl(oldlen + len);
+ if (!need_csum)
+ continue;
+
+ delta = htonl(oldlen + len);
+
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
- uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
+ if (offload_csum) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ } else {
uh->check = gso_make_checksum(skb, ~uh->check);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
-
- skb->protocol = protocol;
} while ((skb = skb->next));
out:
return segs;
@@ -134,7 +154,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
- protocol);
+ protocol, is_ipv6);
out_unlock:
rcu_read_unlock();
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH net-next 3/7] gue: Add infrastructure for flags and options
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
2014-11-01 22:57 ` [PATCH net-next 1/7] net: Move fou_build_header into fou.c and refactor Tom Herbert
2014-11-01 22:57 ` [PATCH net-next 2/7] udp: Offload outer UDP tunnel csum if available Tom Herbert
@ 2014-11-01 22:57 ` Tom Herbert
2014-11-03 17:18 ` David Miller
2014-11-01 22:58 ` [PATCH net-next 4/7] udp: Changes to udp_offload to support remote checksum offload Tom Herbert
` (4 subsequent siblings)
7 siblings, 1 reply; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:57 UTC (permalink / raw)
To: davem, netdev
Add functions and basic definitions for processing standard flags,
private flags, and control messages. This includes definitions
to compute length of optional fields corresponding to a set of flags.
Flag validation is in validate_gue_flags function. This checks for
unknown flags, and that length of optional fields is <= length
in guehdr hlen.
Signed-off-by: Tom Herbert <therbert@google.com>
---
include/net/fou.h | 11 ++++-
include/net/gue.h | 100 ++++++++++++++++++++++++++++++++++++--
net/ipv4/fou.c | 142 ++++++++++++++++++++++++++++++++++++------------------
3 files changed, 199 insertions(+), 54 deletions(-)
diff --git a/include/net/fou.h b/include/net/fou.h
index cf4ce88..d2d8055 100644
--- a/include/net/fou.h
+++ b/include/net/fou.h
@@ -20,7 +20,16 @@ static size_t fou_encap_hlen(struct ip_tunnel_encap *e)
static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
{
- return sizeof(struct udphdr) + sizeof(struct guehdr);
+ size_t len;
+ bool need_priv = false;
+
+ len = sizeof(struct udphdr) + sizeof(struct guehdr);
+
+ /* Add in lengths flags */
+
+ len += need_priv ? GUE_LEN_PRIV : 0;
+
+ return len;
}
#endif
diff --git a/include/net/gue.h b/include/net/gue.h
index b6c3327..cb68ae8 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -1,23 +1,113 @@
#ifndef __NET_GUE_H
#define __NET_GUE_H
+/* Definitions for the GUE header, standard and private flags, lengths
+ * of optional fields are below.
+ *
+ * Diagram of GUE header:
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|C| Hlen | Proto/ctype | Standard flags |P|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * ~ Fields (optional) ~
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Private flags (optional, P bit is set) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * ~ Private fields (optional) ~
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * C bit indicates contol message when set, data message when unset.
+ * For a control message, proto/ctype is interpreted as a type of
+ * control message. For data messages, proto/ctype is the IP protocol
+ * of the next header.
+ *
+ * P bit indicates private flags field is present. The private flags
+ * may refer to options placed after this field.
+ */
+
struct guehdr {
union {
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 hlen:4,
- version:4;
+ __u8 hlen:5,
+ control:1,
+ version:2;
#elif defined (__BIG_ENDIAN_BITFIELD)
- __u8 version:4,
- hlen:4;
+ __u8 version:2,
+ control:1,
+ hlen:5;
#else
#error "Please fix <asm/byteorder.h>"
#endif
- __u8 next_hdr;
+ __u8 proto_ctype;
__u16 flags;
};
__u32 word;
};
};
+/* Standard flags in GUE header */
+
+#define GUE_FLAG_PRIV htons(1<<0) /* Private flags are in options */
+#define GUE_LEN_PRIV 4
+
+#define GUE_FLAGS_ALL (GUE_FLAG_PRIV)
+
+/* Private flags in the private option extension */
+
+#define GUE_PFLAGS_ALL (0)
+
+/* Functions to compute options length corresponding to flags.
+ * If we ever have a lot of flags this can be potentially be
+ * converted to a more optimized algorithm (table lookup
+ * for instance).
+ */
+static inline size_t guehdr_flags_len(__be16 flags)
+{
+ return ((flags & GUE_FLAG_PRIV) ? GUE_LEN_PRIV : 0);
+}
+
+static inline size_t guehdr_priv_flags_len(__be32 flags)
+{
+ return 0;
+}
+
+/* Validate standard and private flags. Returns non-zero (meaning invalid)
+ * if there is an unknown standard or private flags, or the options length for
+ * the flags exceeds the options length specific in hlen of the GUE header.
+ */
+static inline int validate_gue_flags(struct guehdr *guehdr,
+ size_t optlen)
+{
+ size_t len;
+ __be32 flags = guehdr->flags;
+
+ if (flags & ~GUE_FLAGS_ALL)
+ return 1;
+
+ len = guehdr_flags_len(flags);
+ if (len > optlen)
+ return 1;
+
+ if (flags & GUE_FLAG_PRIV) {
+ /* Private flags are last four bytes accounted in
+ * guehdr_flags_len
+ */
+ flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV);
+
+ if (flags & ~GUE_PFLAGS_ALL)
+ return 1;
+
+ len += guehdr_priv_flags_len(flags);
+ if (len > optlen)
+ return 1;
+ }
+
+ return 0;
+}
+
#endif
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5446c1c..a3b8c5b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -38,21 +38,17 @@ static inline struct fou *fou_from_sock(struct sock *sk)
return sk->sk_user_data;
}
-static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
- u8 protocol, size_t len)
+static void fou_recv_pull(struct sk_buff *skb, size_t len)
{
struct iphdr *iph = ip_hdr(skb);
/* Remove 'len' bytes from the packet (UDP header and
- * FOU header if present), modify the protocol to the one
- * we found, and then call rcv_encap.
+ * FOU header if present).
*/
iph->tot_len = htons(ntohs(iph->tot_len) - len);
__skb_pull(skb, len);
skb_postpull_rcsum(skb, udp_hdr(skb), len);
skb_reset_transport_header(skb);
-
- return -protocol;
}
static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -62,16 +58,24 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!fou)
return 1;
- return fou_udp_encap_recv_deliver(skb, fou->protocol,
- sizeof(struct udphdr));
+ fou_recv_pull(skb, sizeof(struct udphdr));
+
+ return -fou->protocol;
+}
+
+static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
+{
+ /* No support yet */
+ kfree_skb(skb);
+ return 0;
}
static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
{
struct fou *fou = fou_from_sock(sk);
- size_t len;
+ size_t len, optlen, hdrlen;
struct guehdr *guehdr;
- struct udphdr *uh;
+ void *data;
if (!fou)
return 1;
@@ -80,25 +84,38 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- len += guehdr->hlen << 2;
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ /* guehdr may change after pull */
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- if (guehdr->version != 0)
- goto drop;
+ hdrlen = sizeof(struct guehdr) + optlen;
- if (guehdr->flags) {
- /* No support yet */
+ if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
goto drop;
+
+ /* Pull UDP and GUE headers */
+ fou_recv_pull(skb, len);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ data += GUE_LEN_PRIV;
+
+ /* Process private flags */
}
- return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+ if (unlikely(guehdr->control))
+ return gue_control_message(skb, guehdr);
+
+ return -guehdr->proto_ctype;
+
drop:
kfree_skb(skb);
return 0;
@@ -154,36 +171,47 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
- u8 proto;
struct guehdr *guehdr;
- unsigned int hlen, guehlen;
- unsigned int off;
+ size_t len, optlen, hdrlen, off;
+ void *data;
int flush = 1;
off = skb_gro_offset(skb);
- hlen = off + sizeof(*guehdr);
+ len = off + sizeof(*guehdr);
+
guehdr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
if (unlikely(!guehdr))
goto out;
}
- proto = guehdr->next_hdr;
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- rcu_read_lock();
- offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_receive))
- goto out_unlock;
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
- guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ if (unlikely(guehdr->control) || guehdr->version != 0 ||
+ validate_gue_flags(guehdr, optlen))
+ goto out;
- hlen = off + guehlen;
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!guehdr))
- goto out_unlock;
+ hdrlen = sizeof(*guehdr) + optlen;
+
+ skb_gro_pull(skb, hdrlen);
+
+ /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+ skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ data += GUE_LEN_PRIV;
+
+ /* Process private flags */
}
flush = 0;
@@ -197,7 +225,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
guehdr2 = (struct guehdr *)(p->data + off);
/* Compare base GUE header to be equal (covers
- * hlen, version, next_hdr, and flags.
+ * hlen, version, proto_ctype, and flags.
*/
if (guehdr->word != guehdr2->word) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -212,10 +240,11 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
}
}
- skb_gro_pull(skb, guehlen);
-
- /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
- skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);
@@ -236,7 +265,7 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff)
u8 proto;
int err = -ENOENT;
- proto = guehdr->next_hdr;
+ proto = guehdr->proto_ctype;
guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
@@ -533,8 +562,12 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
struct guehdr *guehdr;
- size_t hdr_len = sizeof(struct guehdr);
+ size_t optlen = 0;
__be16 sport;
+ void *data;
+ bool need_priv = false;
+
+ optlen += need_priv ? GUE_LEN_PRIV : 0;
skb = iptunnel_handle_offloads(skb, csum, type);
@@ -545,14 +578,27 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
skb, 0, 0, false);
- skb_push(skb, hdr_len);
+ skb_push(skb, sizeof(struct guehdr) + optlen);
guehdr = (struct guehdr *)skb->data;
+ guehdr->control = 0;
guehdr->version = 0;
- guehdr->hlen = 0;
+ guehdr->hlen = optlen >> 2;
guehdr->flags = 0;
- guehdr->next_hdr = *protocol;
+ guehdr->proto_ctype = *protocol;
+
+ data = &guehdr[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+
+ guehdr->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ /* Add private flags */
+ }
fou_build_udp(skb, e, fl4, protocol, sport);
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH net-next 3/7] gue: Add infrastructure for flags and options
2014-11-01 22:57 ` [PATCH net-next 3/7] gue: Add infrastructure for flags and options Tom Herbert
@ 2014-11-03 17:18 ` David Miller
2014-11-03 18:39 ` Tom Herbert
0 siblings, 1 reply; 16+ messages in thread
From: David Miller @ 2014-11-03 17:18 UTC (permalink / raw)
To: therbert; +Cc: netdev
From: Tom Herbert <therbert@google.com>
Date: Sat, 1 Nov 2014 15:57:59 -0700
> @@ -20,7 +20,16 @@ static size_t fou_encap_hlen(struct ip_tunnel_encap *e)
>
> static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
> {
> - return sizeof(struct udphdr) + sizeof(struct guehdr);
> + size_t len;
> + bool need_priv = false;
> +
> + len = sizeof(struct udphdr) + sizeof(struct guehdr);
> +
> + /* Add in lengths flags */
> +
> + len += need_priv ? GUE_LEN_PRIV : 0;
Add this need_priv logic in patch #6, not here.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next 3/7] gue: Add infrastructure for flags and options
2014-11-03 17:18 ` David Miller
@ 2014-11-03 18:39 ` Tom Herbert
2014-11-03 20:12 ` David Miller
0 siblings, 1 reply; 16+ messages in thread
From: Tom Herbert @ 2014-11-03 18:39 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List
On Mon, Nov 3, 2014 at 9:18 AM, David Miller <davem@davemloft.net> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Sat, 1 Nov 2014 15:57:59 -0700
>
>> @@ -20,7 +20,16 @@ static size_t fou_encap_hlen(struct ip_tunnel_encap *e)
>>
>> static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
>> {
>> - return sizeof(struct udphdr) + sizeof(struct guehdr);
>> + size_t len;
>> + bool need_priv = false;
>> +
>> + len = sizeof(struct udphdr) + sizeof(struct guehdr);
>> +
>> + /* Add in lengths flags */
>> +
>> + len += need_priv ? GUE_LEN_PRIV : 0;
>
> Add this need_priv logic in patch #6, not here.
I would rather keep it in this patch. This is adding the common
infrastructure to support private option field, remote checksum
offload is an instance that uses that.
Tom
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next 3/7] gue: Add infrastructure for flags and options
2014-11-03 18:39 ` Tom Herbert
@ 2014-11-03 20:12 ` David Miller
0 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2014-11-03 20:12 UTC (permalink / raw)
To: therbert; +Cc: netdev
From: Tom Herbert <therbert@google.com>
Date: Mon, 3 Nov 2014 10:39:14 -0800
> On Mon, Nov 3, 2014 at 9:18 AM, David Miller <davem@davemloft.net> wrote:
>> From: Tom Herbert <therbert@google.com>
>> Date: Sat, 1 Nov 2014 15:57:59 -0700
>>
>>> @@ -20,7 +20,16 @@ static size_t fou_encap_hlen(struct ip_tunnel_encap *e)
>>>
>>> static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
>>> {
>>> - return sizeof(struct udphdr) + sizeof(struct guehdr);
>>> + size_t len;
>>> + bool need_priv = false;
>>> +
>>> + len = sizeof(struct udphdr) + sizeof(struct guehdr);
>>> +
>>> + /* Add in lengths flags */
>>> +
>>> + len += need_priv ? GUE_LEN_PRIV : 0;
>>
>> Add this need_priv logic in patch #6, not here.
>
> I would rather keep it in this patch. This is adding the common
> infrastructure to support private option field, remote checksum
> offload is an instance that uses that.
Tom, it evaluates always to a constant boolean, and contextually makes
no sense to someone reviewing this change in isolation.
Please, as I have asked, put this in the patch where the logic
actually matters.
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH net-next 4/7] udp: Changes to udp_offload to support remote checksum offload
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
` (2 preceding siblings ...)
2014-11-01 22:57 ` [PATCH net-next 3/7] gue: Add infrastructure for flags and options Tom Herbert
@ 2014-11-01 22:58 ` Tom Herbert
2014-11-01 22:58 ` [PATCH net-next 5/7] gue: Protocol constants for " Tom Herbert
` (3 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:58 UTC (permalink / raw)
To: davem, netdev
Add a new GSO type, SKB_GSO_TUNNEL_REMCSUM, which indicates remote
checksum offload being done (in this case inner checksum must not
be offloaded to the NIC).
Added logic in __skb_udp_tunnel_segment to handle remote checksum
offload case.
Signed-off-by: Tom Herbert <therbert@google.com>
---
include/linux/netdev_features.h | 4 +++-
include/linux/netdevice.h | 1 +
include/linux/skbuff.h | 4 +++-
net/core/skbuff.c | 4 ++--
net/ipv4/af_inet.c | 1 +
net/ipv4/tcp_offload.c | 1 +
net/ipv4/udp_offload.c | 18 ++++++++++++++++--
net/ipv6/ip6_offload.c | 1 +
net/ipv6/udp_offload.c | 1 +
9 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index dcfdecb..8c94b07 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -48,8 +48,9 @@ enum {
NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */
NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */
+ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
- NETIF_F_GSO_MPLS_BIT,
+ NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */
@@ -119,6 +120,7 @@ enum {
#define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL)
#define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
#define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS)
+#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c85e065..b2364f0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3583,6 +3583,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a59d934..a41e101 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -372,6 +372,7 @@ enum {
SKB_GSO_MPLS = 1 << 12,
+ SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
};
#if BITS_PER_LONG > 32
@@ -595,7 +596,8 @@ struct sk_buff {
#endif
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
- /* 4 or 6 bit hole */
+ __u8 remcsum_offload:1;
+ /* 3 or 5 bit hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e48e5c0..7001896 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3013,7 +3013,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (nskb->len == len + doffset)
goto perform_csum_check;
- if (!sg) {
+ if (!sg && !nskb->remcsum_offload) {
nskb->ip_summed = CHECKSUM_NONE;
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
@@ -3085,7 +3085,7 @@ skip_fraglist:
nskb->truesize += nskb->data_len;
perform_csum_check:
- if (!csum) {
+ if (!csum && !nskb->remcsum_offload) {
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b..ed2c672 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1222,6 +1222,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_MPLS |
0)))
goto out;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 5b90f2f..a1b2a56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -97,6 +97,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a774711..0a5a70d 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -41,7 +41,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
unsigned int oldlen;
bool need_csum = !!(skb_shinfo(skb)->gso_type &
SKB_GSO_UDP_TUNNEL_CSUM);
- bool offload_csum = false, dont_encap = need_csum;
+ bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+ bool offload_csum = false, dont_encap = (need_csum || remcsum);
oldlen = (u16)~skb->len;
@@ -55,6 +56,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
skb->encap_hdr_csum = need_csum;
+ skb->remcsum_offload = remcsum;
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
@@ -108,11 +110,22 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
uh->check = ~csum_fold((__force __wsum)
((__force u32)uh->check +
(__force u32)delta));
-
if (offload_csum) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
+ } else if (remcsum) {
+ /* Need to calculate checksum from scratch,
+ * inner checksums are never when doing
+ * remote_checksum_offload.
+ */
+
+ skb->csum = skb_checksum(skb, udp_offset,
+ skb->len - udp_offset,
+ 0);
+ uh->check = csum_fold(skb->csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
} else {
uh->check = gso_make_checksum(skb, ~uh->check);
@@ -192,6 +205,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_IPIP |
SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
SKB_GSO_MPLS) ||
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a071563..e976707 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -78,6 +78,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b8f543..637ba2e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH net-next 5/7] gue: Protocol constants for remote checksum offload
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
` (3 preceding siblings ...)
2014-11-01 22:58 ` [PATCH net-next 4/7] udp: Changes to udp_offload to support remote checksum offload Tom Herbert
@ 2014-11-01 22:58 ` Tom Herbert
2014-11-01 22:58 ` [PATCH net-next 6/7] gue: TX support for using remote checksum offload option Tom Herbert
` (2 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:58 UTC (permalink / raw)
To: davem, netdev
Define a private flag for remote checksun offload as well as a length
for the option.
Signed-off-by: Tom Herbert <therbert@google.com>
---
include/net/gue.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/net/gue.h b/include/net/gue.h
index cb68ae8..3f28ec7 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -59,7 +59,10 @@ struct guehdr {
/* Private flags in the private option extension */
-#define GUE_PFLAGS_ALL (0)
+#define GUE_PFLAG_REMCSUM htonl(1 << 31)
+#define GUE_PLEN_REMCSUM 4
+
+#define GUE_PFLAGS_ALL (GUE_PFLAG_REMCSUM)
/* Functions to compute options length corresponding to flags.
* If we ever have a lot of flags this can be potentially be
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH net-next 6/7] gue: TX support for using remote checksum offload option
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
` (4 preceding siblings ...)
2014-11-01 22:58 ` [PATCH net-next 5/7] gue: Protocol constants for " Tom Herbert
@ 2014-11-01 22:58 ` Tom Herbert
2014-11-01 22:58 ` [PATCH net-next 7/7] gue: Receive side of remote checksum offload Tom Herbert
2014-11-03 21:26 ` [PATCH net-next 0/7] gue: Remote " Jesse Gross
7 siblings, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:58 UTC (permalink / raw)
To: davem, netdev
Add if_tunnel flag TUNNEL_ENCAP_FLAG_REMCSUM to configure
remote checksum offload on an IP tunnel. Add logic in gue_build_header
to insert remote checksum offload option.
Signed-off-by: Tom Herbert <therbert@google.com>
---
include/net/fou.h | 5 ++++-
include/uapi/linux/if_tunnel.h | 1 +
net/ipv4/fou.c | 35 ++++++++++++++++++++++++++++++++---
3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/include/net/fou.h b/include/net/fou.h
index d2d8055..25b26ff 100644
--- a/include/net/fou.h
+++ b/include/net/fou.h
@@ -25,7 +25,10 @@ static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
len = sizeof(struct udphdr) + sizeof(struct guehdr);
- /* Add in lengths flags */
+ if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
+ len += GUE_PLEN_REMCSUM;
+ need_priv = true;
+ }
len += need_priv ? GUE_LEN_PRIV : 0;
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 280d9e0..bd3cc11 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -69,6 +69,7 @@ enum tunnel_encap_types {
#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1)
+#define TUNNEL_ENCAP_FLAG_REMCSUM (1<<2)
/* SIT-mode i_flags */
#define SIT_ISATAP 0x0001
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index a3b8c5b..fb0db99 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -562,11 +562,19 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
struct guehdr *guehdr;
- size_t optlen = 0;
+ size_t hdrlen, optlen = 0;
__be16 sport;
void *data;
bool need_priv = false;
+ if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ csum = false;
+ optlen += GUE_PLEN_REMCSUM;
+ type |= SKB_GSO_TUNNEL_REMCSUM;
+ need_priv = true;
+ }
+
optlen += need_priv ? GUE_LEN_PRIV : 0;
skb = iptunnel_handle_offloads(skb, csum, type);
@@ -578,7 +586,9 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
skb, 0, 0, false);
- skb_push(skb, sizeof(struct guehdr) + optlen);
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
guehdr = (struct guehdr *)skb->data;
@@ -597,7 +607,26 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
*flags = 0;
data += GUE_LEN_PRIV;
- /* Add private flags */
+ if (type & SKB_GSO_TUNNEL_REMCSUM) {
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd = data;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
}
fou_build_udp(skb, e, fl4, protocol, sport);
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH net-next 7/7] gue: Receive side of remote checksum offload
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
` (5 preceding siblings ...)
2014-11-01 22:58 ` [PATCH net-next 6/7] gue: TX support for using remote checksum offload option Tom Herbert
@ 2014-11-01 22:58 ` Tom Herbert
2014-11-03 21:26 ` [PATCH net-next 0/7] gue: Remote " Jesse Gross
7 siblings, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2014-11-01 22:58 UTC (permalink / raw)
To: davem, netdev
Add processing of the remote checksum offload option in both the normal
path as well as the GRO path. The implements patching the affected
checksum to derive the offloaded checksum.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/ipv4/fou.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 161 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index fb0db99..740ae09 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -63,6 +63,59 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
return -fou->protocol;
}
+static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
+ void *data, int hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload) {
+ /* Already processed in GRO path */
+ skb->remcsum_offload = 0;
+ return guehdr;
+ }
+
+ if (start > skb->len - hdrlen ||
+ offset > skb->len - hdrlen - sizeof(u16))
+ return NULL;
+
+ if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
+ __skb_checksum_complete(skb);
+
+ plen = hdrlen + offset + sizeof(u16);
+ if (!pskb_may_pull(skb, plen))
+ return NULL;
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ if (ipproto == IPPROTO_IP && sizeof(struct iphdr) < plen) {
+ struct iphdr *ip = (struct iphdr *)(skb->data + hdrlen);
+
+ /* If next header happens to be IP we can skip that for the
+ * checksum calculation since the IP header checksum is zero
+ * if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(skb->csum, skb_checksum(skb, poffset + hdrlen,
+ start - poffset - hdrlen, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(skb->data + hdrlen + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+
+ return guehdr;
+}
+
static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
{
/* No support yet */
@@ -76,6 +129,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
size_t len, optlen, hdrlen;
struct guehdr *guehdr;
void *data;
+ u16 doffset = 0;
if (!fou)
return 1;
@@ -100,20 +154,43 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
goto drop;
- /* Pull UDP and GUE headers */
- fou_recv_pull(skb, len);
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
+
+ /* Pull UDP header now, skb->data points to guehdr */
+ __skb_pull(skb, sizeof(struct udphdr));
+
+ /* Pull csum through the guehdr now . This can be used if
+ * there is a remote checksum offload.
+ */
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
data = &guehdr[1];
if (guehdr->flags & GUE_FLAG_PRIV) {
- data += GUE_LEN_PRIV;
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
- /* Process private flags */
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_remcsum(skb, guehdr, data + doffset,
+ hdrlen, guehdr->proto_ctype);
+ if (!guehdr)
+ goto drop;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
if (unlikely(guehdr->control))
return gue_control_message(skb, guehdr);
+ __skb_pull(skb, hdrlen);
+ skb_reset_transport_header(skb);
+
return -guehdr->proto_ctype;
drop:
@@ -164,6 +241,66 @@ out_unlock:
return err;
}
+static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
+ struct guehdr *guehdr, void *data,
+ size_t hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ void *ptr;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload)
+ return guehdr;
+
+ if (start > skb_gro_len(skb) - hdrlen ||
+ offset > skb_gro_len(skb) - hdrlen - sizeof(u16) ||
+ !NAPI_GRO_CB(skb)->csum_valid || skb->remcsum_offload)
+ return NULL;
+
+ plen = hdrlen + offset + sizeof(u16);
+
+ /* Pull checksum that will be written */
+ if (skb_gro_header_hard(skb, off + plen)) {
+ guehdr = skb_gro_header_slow(skb, off + plen, off);
+ if (!guehdr)
+ return NULL;
+ }
+
+ ptr = (void *)guehdr + hdrlen;
+
+ if (ipproto == IPPROTO_IP &&
+ (hdrlen + sizeof(struct iphdr) < plen)) {
+ struct iphdr *ip = (struct iphdr *)(ptr + hdrlen);
+
+ /* If next header happens to be IP we can skip
+ * that for the checksum calculation since the
+ * IP header checksum is zero if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(NAPI_GRO_CB(skb)->csum,
+ csum_partial(ptr + poffset, start - poffset, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(ptr + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+ skb->remcsum_offload = 1;
+
+ return guehdr;
+}
+
static struct sk_buff **gue_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -174,6 +311,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
struct guehdr *guehdr;
size_t len, optlen, hdrlen, off;
void *data;
+ u16 doffset = 0;
int flush = 1;
off = skb_gro_offset(skb);
@@ -201,19 +339,33 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
hdrlen = sizeof(*guehdr) + optlen;
- skb_gro_pull(skb, hdrlen);
-
- /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+ /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
+ * this is needed if there is a remote checkcsum offload.
+ */
skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
data = &guehdr[1];
if (guehdr->flags & GUE_FLAG_PRIV) {
- data += GUE_LEN_PRIV;
+ __be32 flags = *(__be32 *)(data + doffset);
- /* Process private flags */
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_gro_remcsum(skb, off, guehdr,
+ data + doffset, hdrlen,
+ guehdr->proto_ctype);
+ if (!guehdr)
+ goto out;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
+ skb_gro_pull(skb, hdrlen);
+
flush = 0;
for (p = *head; p; p = p->next) {
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH net-next 0/7] gue: Remote checksum offload
2014-11-01 22:57 [PATCH net-next 0/7] gue: Remote checksum offload Tom Herbert
` (6 preceding siblings ...)
2014-11-01 22:58 ` [PATCH net-next 7/7] gue: Receive side of remote checksum offload Tom Herbert
@ 2014-11-03 21:26 ` Jesse Gross
2014-11-03 22:39 ` Tom Herbert
7 siblings, 1 reply; 16+ messages in thread
From: Jesse Gross @ 2014-11-03 21:26 UTC (permalink / raw)
To: Tom Herbert; +Cc: David Miller, netdev
On Sat, Nov 1, 2014 at 3:57 PM, Tom Herbert <therbert@google.com> wrote:
> This patch set implements remote checksum offload for
> GUE, which is a mechanism that provides checksum offload of
> encapsulated packets using rudimentary offload capabilities found in
> most Network Interface Card (NIC) devices. The outer header checksum
> for UDP is enabled in packets and, with some additional meta
> information in the GUE header, a receiver is able to deduce the
> checksum to be set for an inner encapsulated packet. Effectively this
> offloads the computation of the inner checksum. Enabling the outer
> checksum in encapsulation has the additional advantage that it covers
> more of the packet than the inner checksum including the encapsulation
> headers.
Tom, I have a pretty hard time squaring this with your previous
comments on hardware offload. This looks almost identical to a
protocol-specific hardware offload to me in terms of the implications
on the stack. It actually is more invasive and less likely to scale
across protocols, so the relative cost/benefit doesn't really add up
in my mind.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next 0/7] gue: Remote checksum offload
2014-11-03 21:26 ` [PATCH net-next 0/7] gue: Remote " Jesse Gross
@ 2014-11-03 22:39 ` Tom Herbert
2014-11-04 0:19 ` Jesse Gross
0 siblings, 1 reply; 16+ messages in thread
From: Tom Herbert @ 2014-11-03 22:39 UTC (permalink / raw)
To: Jesse Gross; +Cc: David Miller, netdev
On Mon, Nov 3, 2014 at 1:26 PM, Jesse Gross <jesse@nicira.com> wrote:
> On Sat, Nov 1, 2014 at 3:57 PM, Tom Herbert <therbert@google.com> wrote:
>> This patch set implements remote checksum offload for
>> GUE, which is a mechanism that provides checksum offload of
>> encapsulated packets using rudimentary offload capabilities found in
>> most Network Interface Card (NIC) devices. The outer header checksum
>> for UDP is enabled in packets and, with some additional meta
>> information in the GUE header, a receiver is able to deduce the
>> checksum to be set for an inner encapsulated packet. Effectively this
>> offloads the computation of the inner checksum. Enabling the outer
>> checksum in encapsulation has the additional advantage that it covers
>> more of the packet than the inner checksum including the encapsulation
>> headers.
>
> Tom, I have a pretty hard time squaring this with your previous
> comments on hardware offload. This looks almost identical to a
> protocol-specific hardware offload to me in terms of the implications
> on the stack. It actually is more invasive and less likely to scale
> across protocols, so the relative cost/benefit doesn't really add up
> in my mind.
With this patch and checksum-unnecessary conversion we can provide
checksum offload for encapsulation on millions of already deployed
NICs without any HW or FW change. Why do you think this is not a good
cost/benefit tradeoff?
Thanks,
Tom
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH net-next 0/7] gue: Remote checksum offload
2014-11-03 22:39 ` Tom Herbert
@ 2014-11-04 0:19 ` Jesse Gross
2014-11-04 0:59 ` Tom Herbert
0 siblings, 1 reply; 16+ messages in thread
From: Jesse Gross @ 2014-11-04 0:19 UTC (permalink / raw)
To: Tom Herbert; +Cc: David Miller, netdev
On Mon, Nov 3, 2014 at 2:39 PM, Tom Herbert <therbert@google.com> wrote:
> On Mon, Nov 3, 2014 at 1:26 PM, Jesse Gross <jesse@nicira.com> wrote:
>> On Sat, Nov 1, 2014 at 3:57 PM, Tom Herbert <therbert@google.com> wrote:
>>> This patch set implements remote checksum offload for
>>> GUE, which is a mechanism that provides checksum offload of
>>> encapsulated packets using rudimentary offload capabilities found in
>>> most Network Interface Card (NIC) devices. The outer header checksum
>>> for UDP is enabled in packets and, with some additional meta
>>> information in the GUE header, a receiver is able to deduce the
>>> checksum to be set for an inner encapsulated packet. Effectively this
>>> offloads the computation of the inner checksum. Enabling the outer
>>> checksum in encapsulation has the additional advantage that it covers
>>> more of the packet than the inner checksum including the encapsulation
>>> headers.
>>
>> Tom, I have a pretty hard time squaring this with your previous
>> comments on hardware offload. This looks almost identical to a
>> protocol-specific hardware offload to me in terms of the implications
>> on the stack. It actually is more invasive and less likely to scale
>> across protocols, so the relative cost/benefit doesn't really add up
>> in my mind.
>
> With this patch and checksum-unnecessary conversion we can provide
> checksum offload for encapsulation on millions of already deployed
> NICs without any HW or FW change. Why do you think this is not a good
> cost/benefit tradeoff?
I just don't see how this is consistent with your previously stated
goal of keeping protocol-specific offload code out of the core stack.
Can you explain how this is different?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH net-next 0/7] gue: Remote checksum offload
2014-11-04 0:19 ` Jesse Gross
@ 2014-11-04 0:59 ` Tom Herbert
2014-11-04 17:33 ` Jesse Gross
0 siblings, 1 reply; 16+ messages in thread
From: Tom Herbert @ 2014-11-04 0:59 UTC (permalink / raw)
To: Jesse Gross; +Cc: David Miller, netdev
On Mon, Nov 3, 2014 at 4:19 PM, Jesse Gross <jesse@nicira.com> wrote:
> On Mon, Nov 3, 2014 at 2:39 PM, Tom Herbert <therbert@google.com> wrote:
>> On Mon, Nov 3, 2014 at 1:26 PM, Jesse Gross <jesse@nicira.com> wrote:
>>> On Sat, Nov 1, 2014 at 3:57 PM, Tom Herbert <therbert@google.com> wrote:
>>>> This patch set implements remote checksum offload for
>>>> GUE, which is a mechanism that provides checksum offload of
>>>> encapsulated packets using rudimentary offload capabilities found in
>>>> most Network Interface Card (NIC) devices. The outer header checksum
>>>> for UDP is enabled in packets and, with some additional meta
>>>> information in the GUE header, a receiver is able to deduce the
>>>> checksum to be set for an inner encapsulated packet. Effectively this
>>>> offloads the computation of the inner checksum. Enabling the outer
>>>> checksum in encapsulation has the additional advantage that it covers
>>>> more of the packet than the inner checksum including the encapsulation
>>>> headers.
>>>
>>> Tom, I have a pretty hard time squaring this with your previous
>>> comments on hardware offload. This looks almost identical to a
>>> protocol-specific hardware offload to me in terms of the implications
>>> on the stack. It actually is more invasive and less likely to scale
>>> across protocols, so the relative cost/benefit doesn't really add up
>>> in my mind.
>>
>> With this patch and checksum-unnecessary conversion we can provide
>> checksum offload for encapsulation on millions of already deployed
>> NICs without any HW or FW change. Why do you think this is not a good
>> cost/benefit tradeoff?
>
> I just don't see how this is consistent with your previously stated
> goal of keeping protocol-specific offload code out of the core stack.
> Can you explain how this is different?
I think my request was more to avoid putting protocol-specific HW
offload code in the core stack when existing mechanisms could be used.
For instance, ntuple filtering is a more generic interface to tell a
device about special processing for a UDP port than adding an port
registration mechanism that needs to account for each possible
encapsulation protocol.
In these patches we do modify __skb_udp_tunnel_segment which I assume
is what you're referring to in touching the core stack. There are two
parts to this: 1) Allowing checksum offload of outer UDP header is
applicable to any UDP encapsulation protocol 2) When doing remote
checksum we need to avoid touching the inner checksum. The latter is
indicated by SKB_GSO_TUNNEL_REMCSUM being set by the encapsulation
layer. As I mention in the I-D, remote checksum offload can be
implemented by any encapsulation protocol that supports some
reasonable extension (for instance, this is probably something that
could be implemented in geneve). SKB_GSO_TUNNEL_REMCSUM is a generic
interface by that definition.
But, if you really have a strong objection, I suppose we can start
using at gso_segment in udp_offloads and put the remote checksum
offload processing for GSO in a GUE specific segment function.
Thanks,
Tom
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH net-next 0/7] gue: Remote checksum offload
2014-11-04 0:59 ` Tom Herbert
@ 2014-11-04 17:33 ` Jesse Gross
0 siblings, 0 replies; 16+ messages in thread
From: Jesse Gross @ 2014-11-04 17:33 UTC (permalink / raw)
To: Tom Herbert; +Cc: David Miller, netdev
On Mon, Nov 3, 2014 at 4:59 PM, Tom Herbert <therbert@google.com> wrote:
> On Mon, Nov 3, 2014 at 4:19 PM, Jesse Gross <jesse@nicira.com> wrote:
>> On Mon, Nov 3, 2014 at 2:39 PM, Tom Herbert <therbert@google.com> wrote:
>>> On Mon, Nov 3, 2014 at 1:26 PM, Jesse Gross <jesse@nicira.com> wrote:
>>>> On Sat, Nov 1, 2014 at 3:57 PM, Tom Herbert <therbert@google.com> wrote:
>>>>> This patch set implements remote checksum offload for
>>>>> GUE, which is a mechanism that provides checksum offload of
>>>>> encapsulated packets using rudimentary offload capabilities found in
>>>>> most Network Interface Card (NIC) devices. The outer header checksum
>>>>> for UDP is enabled in packets and, with some additional meta
>>>>> information in the GUE header, a receiver is able to deduce the
>>>>> checksum to be set for an inner encapsulated packet. Effectively this
>>>>> offloads the computation of the inner checksum. Enabling the outer
>>>>> checksum in encapsulation has the additional advantage that it covers
>>>>> more of the packet than the inner checksum including the encapsulation
>>>>> headers.
>>>>
>>>> Tom, I have a pretty hard time squaring this with your previous
>>>> comments on hardware offload. This looks almost identical to a
>>>> protocol-specific hardware offload to me in terms of the implications
>>>> on the stack. It actually is more invasive and less likely to scale
>>>> across protocols, so the relative cost/benefit doesn't really add up
>>>> in my mind.
>>>
>>> With this patch and checksum-unnecessary conversion we can provide
>>> checksum offload for encapsulation on millions of already deployed
>>> NICs without any HW or FW change. Why do you think this is not a good
>>> cost/benefit tradeoff?
>>
>> I just don't see how this is consistent with your previously stated
>> goal of keeping protocol-specific offload code out of the core stack.
>> Can you explain how this is different?
>
> I think my request was more to avoid putting protocol-specific HW
> offload code in the core stack when existing mechanisms could be used.
> For instance, ntuple filtering is a more generic interface to tell a
> device about special processing for a UDP port than adding an port
> registration mechanism that needs to account for each possible
> encapsulation protocol.
Yes, haven't forgotten about the previous discussion. Hopefully, we'll
be able to spend some time working on this soon and see how it pans
out.
> In these patches we do modify __skb_udp_tunnel_segment which I assume
> is what you're referring to in touching the core stack. There are two
> parts to this: 1) Allowing checksum offload of outer UDP header is
> applicable to any UDP encapsulation protocol 2) When doing remote
> checksum we need to avoid touching the inner checksum. The latter is
> indicated by SKB_GSO_TUNNEL_REMCSUM being set by the encapsulation
> layer. As I mention in the I-D, remote checksum offload can be
> implemented by any encapsulation protocol that supports some
> reasonable extension (for instance, this is probably something that
> could be implemented in geneve). SKB_GSO_TUNNEL_REMCSUM is a generic
> interface by that definition.
I understand that in theory that this could be applied to other
protocols but in practice I think that is relatively unlikely in most
use cases. Optimizations that result in externally visible change are
usually a no-no (I know this is funny coming from me given STT but the
intention was always that it would be a stopgap until hardware support
was available.)
> But, if you really have a strong objection, I suppose we can start
> using at gso_segment in udp_offloads and put the remote checksum
> offload processing for GSO in a GUE specific segment function.
I thought about this and while it does seem a little unfortunate, I
think it may be the best solution since I suspect that this won't be
the last instance of something like this. It also mirrors what we have
on receive with GRO.
I think if we did that we could also change SKB_GSO_TUNNEL_REMCSUM to
SKB_GSO_SW_ONLY (maybe there is a better name) to indicate that this
is something don't expect hardware to implement. It seems like this
could be something that could be generally useful in the future as
well.
^ permalink raw reply [flat|nested] 16+ messages in thread