* [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-07 17:10 [PATCH v2 net-next 0/5] Local Checksum Offload Edward Cree
@ 2016-01-07 17:12 ` Edward Cree
2016-01-07 17:22 ` David Laight
` (2 more replies)
2016-01-07 17:12 ` [PATCH v2 net-next 2/5] net: enable LCO for udp_tunnel_handle_offloads() users Edward Cree
` (4 subsequent siblings)
5 siblings, 3 replies; 31+ messages in thread
From: Edward Cree @ 2016-01-07 17:12 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers, Tom Herbert
The arithmetic properties of the ones-complement checksum mean that a
correctly checksummed inner packet, including its checksum, has a ones
complement sum depending only on whatever value was used to initialise
the checksum field before checksumming (in the case of TCP and UDP,
this is the ones complement sum of the pseudo header, complemented).
Consequently, if we are going to offload the inner checksum with
CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
packed data not covered by the inner checksum, and the initial value of
the inner checksum field.
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
include/linux/skbuff.h | 26 ++++++++++++++++++++++++++
net/ipv4/ip_tunnel_core.c | 4 ++++
net/ipv4/udp.c | 29 ++++++++++-------------------
net/ipv6/ip6_checksum.c | 24 ++++++++----------------
4 files changed, 48 insertions(+), 35 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6b6bd42..6590d08 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3665,5 +3665,31 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
return hdr_len + skb_gso_transport_seglen(skb);
}
+/* Local Checksum Offload.
+ * Compute outer checksum based on the assumption that the
+ * inner checksum will be offloaded later.
+ * See Documentation/networking/tx-offloads.txt for
+ * explanation of how this works.
+ * Fill in outer checksum adjustment (e.g. with sum of outer
+ * pseudo-header) before calling.
+ * Also ensure that inner checksum is in linear data area.
+ */
+static inline __wsum lco_csum(struct sk_buff *skb)
+{
+ char *inner_csum_field;
+ __wsum csum;
+
+ /* Start with complement of inner checksum adjustment */
+ inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
+ skb->csum_offset;
+ csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
+ /* Add in checksum of our headers (incl. outer checksum
+ * adjustment filled in by caller)
+ */
+ csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
+ /* The result is the outer checksum */
+ return csum;
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 1db8418..f39b064 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -146,6 +146,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
}
EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
+/* csum_help should only be ever true if the protocol doesn't support LCO.
+ * If the tunnel uses udp_tunnel_xmit_skb(), then it gets LCO for free, and
+ * should always set csum_help to false.
+ */
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help,
int gso_type_mask)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8841e98..c1c73be 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -767,32 +767,23 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
{
struct udphdr *uh = udp_hdr(skb);
- if (nocheck)
+ if (nocheck) {
uh->check = 0;
- else if (skb_is_gso(skb))
+ } else if (skb_is_gso(skb)) {
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features &
- (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ __wsum csum;
+ uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+ csum = lco_csum(skb);
+ uh->check = csum_fold(csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v4_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp_set_csum);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 9a4d732..19e564a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -98,27 +98,19 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
uh->check = 0;
else if (skb_is_gso(skb))
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+ else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ __wsum csum;
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ csum = lco_csum(skb);
+ uh->check = csum_fold(csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v6_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp6_set_csum);
^ permalink raw reply related [flat|nested] 31+ messages in thread* RE: [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-07 17:12 ` [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation Edward Cree
@ 2016-01-07 17:22 ` David Laight
2016-01-07 17:54 ` Edward Cree
2016-01-07 18:42 ` Tom Herbert
2016-01-07 22:53 ` Alexander Duyck
2 siblings, 1 reply; 31+ messages in thread
From: David Laight @ 2016-01-07 17:22 UTC (permalink / raw)
To: 'Edward Cree', David Miller
Cc: netdev@vger.kernel.org, linux-net-drivers@solarflare.com,
Tom Herbert
From: Edward Cree
> Sent: 07 January 2016 17:12
> The arithmetic properties of the ones-complement checksum mean that a
> correctly checksummed inner packet, including its checksum, has a ones
> complement sum depending only on whatever value was used to initialise
> the checksum field before checksumming (in the case of TCP and UDP,
> this is the ones complement sum of the pseudo header, complemented).
> Consequently, if we are going to offload the inner checksum with
> CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
> packed data not covered by the inner checksum, and the initial value of
> the inner checksum field.
Isn't it even simpler than that?
The checksum of the inner packet (including its header) is ~0 (ie 0).
So the checksum of the whole packet (for the outer header) is the same
as that of the packet down to the start of the inner header.
David
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-07 17:22 ` David Laight
@ 2016-01-07 17:54 ` Edward Cree
0 siblings, 0 replies; 31+ messages in thread
From: Edward Cree @ 2016-01-07 17:54 UTC (permalink / raw)
To: David Laight, David Miller
Cc: netdev@vger.kernel.org, linux-net-drivers@solarflare.com,
Tom Herbert
On 07/01/16 17:22, David Laight wrote:
> Isn't it even simpler than that?
> The checksum of the inner packet (including its header) is ~0 (ie 0).
> So the checksum of the whole packet (for the outer header) is the same
> as that of the packet down to the start of the inner header.
Not quite. The inner pseudo-header is included in the inner checksum
calculation, but does not actually appear in the packet. Thus the checksum
of the inner packet is ~sum_of_inner_pseudo_header.
-ed
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-07 17:12 ` [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation Edward Cree
2016-01-07 17:22 ` David Laight
@ 2016-01-07 18:42 ` Tom Herbert
2016-01-07 22:53 ` Alexander Duyck
2 siblings, 0 replies; 31+ messages in thread
From: Tom Herbert @ 2016-01-07 18:42 UTC (permalink / raw)
To: Edward Cree
Cc: David Miller, Linux Kernel Network Developers, linux-net-drivers
On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
> The arithmetic properties of the ones-complement checksum mean that a
> correctly checksummed inner packet, including its checksum, has a ones
> complement sum depending only on whatever value was used to initialise
> the checksum field before checksumming (in the case of TCP and UDP,
> this is the ones complement sum of the pseudo header, complemented).
> Consequently, if we are going to offload the inner checksum with
> CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
> packed data not covered by the inner checksum, and the initial value of
> the inner checksum field.
>
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> include/linux/skbuff.h | 26 ++++++++++++++++++++++++++
> net/ipv4/ip_tunnel_core.c | 4 ++++
> net/ipv4/udp.c | 29 ++++++++++-------------------
> net/ipv6/ip6_checksum.c | 24 ++++++++----------------
> 4 files changed, 48 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 6b6bd42..6590d08 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3665,5 +3665,31 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
> return hdr_len + skb_gso_transport_seglen(skb);
> }
>
> +/* Local Checksum Offload.
> + * Compute outer checksum based on the assumption that the
> + * inner checksum will be offloaded later.
> + * See Documentation/networking/tx-offloads.txt for
> + * explanation of how this works.
> + * Fill in outer checksum adjustment (e.g. with sum of outer
> + * pseudo-header) before calling.
> + * Also ensure that inner checksum is in linear data area.
> + */
> +static inline __wsum lco_csum(struct sk_buff *skb)
> +{
> + char *inner_csum_field;
> + __wsum csum;
> +
> + /* Start with complement of inner checksum adjustment */
> + inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
> + skb->csum_offset;
> + csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
> + /* Add in checksum of our headers (incl. outer checksum
> + * adjustment filled in by caller)
> + */
> + csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
> + /* The result is the outer checksum */
Specifically packet checksum starting from skb->data.
> + return csum;
> +}
> +
> #endif /* __KERNEL__ */
> #endif /* _LINUX_SKBUFF_H */
> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
> index 1db8418..f39b064 100644
> --- a/net/ipv4/ip_tunnel_core.c
> +++ b/net/ipv4/ip_tunnel_core.c
> @@ -146,6 +146,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
> }
> EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
>
> +/* csum_help should only be ever true if the protocol doesn't support LCO.
> + * If the tunnel uses udp_tunnel_xmit_skb(), then it gets LCO for free, and
> + * should always set csum_help to false.
> + */
This comment probably isn't necessary.
> struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
> bool csum_help,
> int gso_type_mask)
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 8841e98..c1c73be 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -767,32 +767,23 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
> {
> struct udphdr *uh = udp_hdr(skb);
>
> - if (nocheck)
> + if (nocheck) {
> uh->check = 0;
> - else if (skb_is_gso(skb))
> + } else if (skb_is_gso(skb)) {
> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> - else if (skb_dst(skb) && skb_dst(skb)->dev &&
> - (skb_dst(skb)->dev->features &
> - (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
> -
I think getting rid of this check is correct, but maybe it should be
in a separate patch?
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + __wsum csum;
>
> + uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> + csum = lco_csum(skb);
> + uh->check = csum_fold(csum);
> + if (uh->check == 0)
> + uh->check = CSUM_MANGLED_0;
> + } else {
> skb->ip_summed = CHECKSUM_PARTIAL;
> skb->csum_start = skb_transport_header(skb) - skb->head;
> skb->csum_offset = offsetof(struct udphdr, check);
> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> - } else {
> - __wsum csum;
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> -
> - uh->check = 0;
> - csum = skb_checksum(skb, 0, len, 0);
> - uh->check = udp_v4_check(len, saddr, daddr, csum);
> - if (uh->check == 0)
> - uh->check = CSUM_MANGLED_0;
> -
> - skb->ip_summed = CHECKSUM_UNNECESSARY;
> }
> }
> EXPORT_SYMBOL(udp_set_csum);
> diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
> index 9a4d732..19e564a 100644
> --- a/net/ipv6/ip6_checksum.c
> +++ b/net/ipv6/ip6_checksum.c
> @@ -98,27 +98,19 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
> uh->check = 0;
> else if (skb_is_gso(skb))
> uh->check = ~udp_v6_check(len, saddr, daddr, 0);
> - else if (skb_dst(skb) && skb_dst(skb)->dev &&
> - (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> + else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + __wsum csum;
>
> + uh->check = ~udp_v6_check(len, saddr, daddr, 0);
> + csum = lco_csum(skb);
> + uh->check = csum_fold(csum);
Just do uh->check = csum_fold(loc)csum(skb)) to eliminate need for
local csum variable.
> + if (uh->check == 0)
> + uh->check = CSUM_MANGLED_0;
> + } else {
> skb->ip_summed = CHECKSUM_PARTIAL;
> skb->csum_start = skb_transport_header(skb) - skb->head;
> skb->csum_offset = offsetof(struct udphdr, check);
> uh->check = ~udp_v6_check(len, saddr, daddr, 0);
> - } else {
> - __wsum csum;
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> -
> - uh->check = 0;
> - csum = skb_checksum(skb, 0, len, 0);
> - uh->check = udp_v6_check(len, saddr, daddr, csum);
> - if (uh->check == 0)
> - uh->check = CSUM_MANGLED_0;
> -
> - skb->ip_summed = CHECKSUM_UNNECESSARY;
> }
> }
> EXPORT_SYMBOL(udp6_set_csum);
>
> The information contained in this message is confidential and is intended for the addressee(s) only. If you have received this message in error, please notify the sender immediately and delete the message. Unless you are an addressee (or authorized to receive for an addressee), you may not use, copy or disclose to anyone this message or any information contained in this message. The unauthorized use, disclosure, copying or alteration of this message is strictly prohibited.
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-07 17:12 ` [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation Edward Cree
2016-01-07 17:22 ` David Laight
2016-01-07 18:42 ` Tom Herbert
@ 2016-01-07 22:53 ` Alexander Duyck
2016-01-08 15:32 ` Edward Cree
2 siblings, 1 reply; 31+ messages in thread
From: Alexander Duyck @ 2016-01-07 22:53 UTC (permalink / raw)
To: Edward Cree; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
>
> The arithmetic properties of the ones-complement checksum mean that a
> correctly checksummed inner packet, including its checksum, has a ones
> complement sum depending only on whatever value was used to initialise
> the checksum field before checksumming (in the case of TCP and UDP,
> this is the ones complement sum of the pseudo header, complemented).
> Consequently, if we are going to offload the inner checksum with
> CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
> packed data not covered by the inner checksum, and the initial value of
> the inner checksum field.
>
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> include/linux/skbuff.h | 26 ++++++++++++++++++++++++++
> net/ipv4/ip_tunnel_core.c | 4 ++++
> net/ipv4/udp.c | 29 ++++++++++-------------------
> net/ipv6/ip6_checksum.c | 24 ++++++++----------------
> 4 files changed, 48 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 6b6bd42..6590d08 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3665,5 +3665,31 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
> return hdr_len + skb_gso_transport_seglen(skb);
> }
>
> +/* Local Checksum Offload.
> + * Compute outer checksum based on the assumption that the
> + * inner checksum will be offloaded later.
> + * See Documentation/networking/tx-offloads.txt for
> + * explanation of how this works.
> + * Fill in outer checksum adjustment (e.g. with sum of outer
> + * pseudo-header) before calling.
> + * Also ensure that inner checksum is in linear data area.
> + */
> +static inline __wsum lco_csum(struct sk_buff *skb)
> +{
> + char *inner_csum_field;
> + __wsum csum;
> +
> + /* Start with complement of inner checksum adjustment */
> + inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
> + skb->csum_offset;
You would probably benefit from caching off the result of
skb_checksum_start_offset into a local variable so the compiler
doesn't go through and recompute it when you call it again below.
> + csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
This seems like a lot of work, couldn't you get away with just
bit-flipping this and moving it into uh->check on the outer header?
> + /* Add in checksum of our headers (incl. outer checksum
> + * adjustment filled in by caller)
> + */
> + csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
> + /* The result is the outer checksum */
> + return csum;
> +}
> +
The more I think about it I am not sure how much there is to be gained
by having this as a separate function anyway since I think you might
be able to better exploit things with a few changes to the ordering of
operations. See my notes below in the IPv4 section.
> #endif /* __KERNEL__ */
> #endif /* _LINUX_SKBUFF_H */
> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
> index 1db8418..f39b064 100644
> --- a/net/ipv4/ip_tunnel_core.c
> +++ b/net/ipv4/ip_tunnel_core.c
> @@ -146,6 +146,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
> }
> EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
>
> +/* csum_help should only be ever true if the protocol doesn't support LCO.
> + * If the tunnel uses udp_tunnel_xmit_skb(), then it gets LCO for free, and
> + * should always set csum_help to false.
> + */
> struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
> bool csum_help,
> int gso_type_mask)
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 8841e98..c1c73be 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -767,32 +767,23 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
> {
> struct udphdr *uh = udp_hdr(skb);
>
> - if (nocheck)
> + if (nocheck) {
> uh->check = 0;
> - else if (skb_is_gso(skb))
> + } else if (skb_is_gso(skb)) {
> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> - else if (skb_dst(skb) && skb_dst(skb)->dev &&
> - (skb_dst(skb)->dev->features &
> - (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + __wsum csum;
>
I wonder if this shouldn't be made a check that is in addition to the
two options below instead of completely replacing them. The question
I would have is if there are any cases where we need to follow the
path that results in the CHECKSUM_UNNECESSARY being set.
>
> + uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> + csum = lco_csum(skb);
> + uh->check = csum_fold(csum);
> + if (uh->check == 0)
> + uh->check = CSUM_MANGLED_0;
You would probably benefit from reordering this to something like what
we have in the last block below this one. The idea is then you only
halve to fold things once and can avoid some unnecessary duplication.
So you could code it up with something like:
__wsum csum;
int start_offset;
start_offset = skb_checksum_start_offset(skb);
uh->check = ~(*(__sum16 *)(skb->data + start_offset + skb->csum_offset));
csum = skb_checksum(skb, 0, start_offset, 0);
uh->check = udp_v4_check(len, saddr, daddr, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
Forgive the formatting, my email client mangles tabs badly. By using
the pseudo header checksum from the inner header for the starting
value and then computing the udp_v4_check for the outer header last
you save yourself a few cycles since you only have to fold the
checksum once instead of once for the pseudo-header and again for the
final result.
>
> + } else {
> skb->ip_summed = CHECKSUM_PARTIAL;
> skb->csum_start = skb_transport_header(skb) - skb->head;
> skb->csum_offset = offsetof(struct udphdr, check);
> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> - } else {
> - __wsum csum;
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> -
> - uh->check = 0;
> - csum = skb_checksum(skb, 0, len, 0);
> - uh->check = udp_v4_check(len, saddr, daddr, csum);
> - if (uh->check == 0)
> - uh->check = CSUM_MANGLED_0;
> -
> - skb->ip_summed = CHECKSUM_UNNECESSARY;
> }
> }
> EXPORT_SYMBOL(udp_set_csum);
> diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
> index 9a4d732..19e564a 100644
> --- a/net/ipv6/ip6_checksum.c
> +++ b/net/ipv6/ip6_checksum.c
> @@ -98,27 +98,19 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
> uh->check = 0;
> else if (skb_is_gso(skb))
> uh->check = ~udp_v6_check(len, saddr, daddr, 0);
> - else if (skb_dst(skb) && skb_dst(skb)->dev &&
> - (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> + else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + __wsum csum;
>
> + uh->check = ~udp_v6_check(len, saddr, daddr, 0);
> + csum = lco_csum(skb);
> + uh->check = csum_fold(csum);
> + if (uh->check == 0)
> + uh->check = CSUM_MANGLED_0;
Same here for the IPv6 block. Instead of initializing uh->check using
the outer pseudo-header checksum you would probably be better off
using the inner header and then folding that into the outer
pseudo-header checksum last.
>
> + } else {
> skb->ip_summed = CHECKSUM_PARTIAL;
> skb->csum_start = skb_transport_header(skb) - skb->head;
> skb->csum_offset = offsetof(struct udphdr, check);
> uh->check = ~udp_v6_check(len, saddr, daddr, 0);
> - } else {
> - __wsum csum;
> -
> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
> -
> - uh->check = 0;
> - csum = skb_checksum(skb, 0, len, 0);
> - uh->check = udp_v6_check(len, saddr, daddr, csum);
> - if (uh->check == 0)
> - uh->check = CSUM_MANGLED_0;
> -
> - skb->ip_summed = CHECKSUM_UNNECESSARY;
> }
> }
> EXPORT_SYMBOL(udp6_set_csum);
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-07 22:53 ` Alexander Duyck
@ 2016-01-08 15:32 ` Edward Cree
2016-01-08 17:30 ` Alexander Duyck
0 siblings, 1 reply; 31+ messages in thread
From: Edward Cree @ 2016-01-08 15:32 UTC (permalink / raw)
To: Alexander Duyck; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On 07/01/16 22:53, Alexander Duyck wrote:
> On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
>> The arithmetic properties of the ones-complement checksum mean that a
>> correctly checksummed inner packet, including its checksum, has a ones
>> complement sum depending only on whatever value was used to initialise
>> the checksum field before checksumming (in the case of TCP and UDP,
>> this is the ones complement sum of the pseudo header, complemented).
>> Consequently, if we are going to offload the inner checksum with
>> CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
>> packed data not covered by the inner checksum, and the initial value of
>> the inner checksum field.
>>
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
>> ---
>> include/linux/skbuff.h | 26 ++++++++++++++++++++++++++
>> net/ipv4/ip_tunnel_core.c | 4 ++++
>> net/ipv4/udp.c | 29 ++++++++++-------------------
>> net/ipv6/ip6_checksum.c | 24 ++++++++----------------
>> 4 files changed, 48 insertions(+), 35 deletions(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 6b6bd42..6590d08 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -3665,5 +3665,31 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
>> return hdr_len + skb_gso_transport_seglen(skb);
>> }
>>
>> +/* Local Checksum Offload.
>> + * Compute outer checksum based on the assumption that the
>> + * inner checksum will be offloaded later.
>> + * See Documentation/networking/tx-offloads.txt for
>> + * explanation of how this works.
>> + * Fill in outer checksum adjustment (e.g. with sum of outer
>> + * pseudo-header) before calling.
>> + * Also ensure that inner checksum is in linear data area.
>> + */
>> +static inline __wsum lco_csum(struct sk_buff *skb)
>> +{
>> + char *inner_csum_field;
>> + __wsum csum;
>> +
>> + /* Start with complement of inner checksum adjustment */
>> + inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
>> + skb->csum_offset;
> You would probably benefit from caching off the result of
> skb_checksum_start_offset into a local variable so the compiler
> doesn't go through and recompute it when you call it again below.
It's a nearly-trivial inline function; won't the compiler be smart enough to
cache that result itself?
>> + csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
> This seems like a lot of work, couldn't you get away with just
> bit-flipping this and moving it into uh->check on the outer header?
It's not a lot of work: all this does is zero-extend to 32 bits and flip.
It looks like more, but most of it is just a cast; it's written in this way
to pacify sparse while using as little __force as possible.
lco_csum can't move it into uh->check, because it doesn't have uh. In fact,
the skb might not even be UDP - this function is intended to be used also
for GRE, which has an IP-style checksum but in a different place. (Next
version of the patch series will implement that btw)
>> + /* Add in checksum of our headers (incl. outer checksum
>> + * adjustment filled in by caller)
>> + */
>> + csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
>> + /* The result is the outer checksum */
>> + return csum;
>> +}
>> +
> The more I think about it I am not sure how much there is to be gained
> by having this as a separate function anyway since I think you might
> be able to better exploit things with a few changes to the ordering of
> operations. See my notes below in the IPv4 section.
>
>> #endif /* __KERNEL__ */
>> #endif /* _LINUX_SKBUFF_H */
>> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
>> index 1db8418..f39b064 100644
>> --- a/net/ipv4/ip_tunnel_core.c
>> +++ b/net/ipv4/ip_tunnel_core.c
>> @@ -146,6 +146,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
>> }
>> EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
>>
>> +/* csum_help should only be ever true if the protocol doesn't support LCO.
>> + * If the tunnel uses udp_tunnel_xmit_skb(), then it gets LCO for free, and
>> + * should always set csum_help to false.
>> + */
>> struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
>> bool csum_help,
>> int gso_type_mask)
>> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
>> index 8841e98..c1c73be 100644
>> --- a/net/ipv4/udp.c
>> +++ b/net/ipv4/udp.c
>> @@ -767,32 +767,23 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
>> {
>> struct udphdr *uh = udp_hdr(skb);
>>
>> - if (nocheck)
>> + if (nocheck) {
>> uh->check = 0;
>> - else if (skb_is_gso(skb))
>> + } else if (skb_is_gso(skb)) {
>> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
>> - else if (skb_dst(skb) && skb_dst(skb)->dev &&
>> - (skb_dst(skb)->dev->features &
>> - (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
>> -
>> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
>> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>> + __wsum csum;
>>
> I wonder if this shouldn't be made a check that is in addition to the
> two options below instead of completely replacing them. The question
> I would have is if there are any cases where we need to follow the
> path that results in the CHECKSUM_UNNECESSARY being set.
I don't think there can be such a case.
Either: we've already set up PARTIAL for an inner header, so we can
definitely do LCO.
Or: we haven't set up PARTIAL yet, so we can use that now. If the device
doesn't support it, it'll get fixed up later when we validate_xmit_skb().
So there's no way (AFAICT) that we'd ever not be able to use PARTIAL.
Unless - hmmm - what happens if we've set up PARTIAL for a CRC rather than
an IP checksum? However, it looks to me as if in that case the old code
would have screwed up when iptunnel_handle_offloads() would do the inner
csum in skb_checksum_help() and would do it as an IP checksum. So I'm
guessing this probably can't happen. Or it's already broken and so my
patch won't make it any worse ;)
However, the next version of the patch series will split this change out
from the rest of the patch, as Tom Herbert suggested.
>
>> + uh->check = ~udp_v4_check(len, saddr, daddr, 0);
>> + csum = lco_csum(skb);
>> + uh->check = csum_fold(csum);
>> + if (uh->check == 0)
>> + uh->check = CSUM_MANGLED_0;
>
> You would probably benefit from reordering this to something like what
> we have in the last block below this one. The idea is then you only
> halve to fold things once and can avoid some unnecessary duplication.
>
> So you could code it up with something like:
> __wsum csum;
> int start_offset;
>
> start_offset = skb_checksum_start_offset(skb);
> uh->check = ~(*(__sum16 *)(skb->data + start_offset + skb->csum_offset));
> csum = skb_checksum(skb, 0, start_offset, 0);
> uh->check = udp_v4_check(len, saddr, daddr, csum);
> if (uh->check == 0)
> uh->check = CSUM_MANGLED_0;
>
> Forgive the formatting, my email client mangles tabs badly. By using
> the pseudo header checksum from the inner header for the starting
> value and then computing the udp_v4_check for the outer header last
> you save yourself a few cycles since you only have to fold the
> checksum once instead of once for the pseudo-header and again for the
> final result.
Hmm, I think we can do this without losing the helper function (which will
be shared not just by UDPv4 and UDPv6 but also GRE).
Something like this:
uh->check = 0;
uh->check = ~udp_v4_check(len, saddr, daddr, lco_csum(skb));
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
Now the only fold is the one udp_v4_check() does.
Would that shave off enough cycles to satisfy?
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation
2016-01-08 15:32 ` Edward Cree
@ 2016-01-08 17:30 ` Alexander Duyck
0 siblings, 0 replies; 31+ messages in thread
From: Alexander Duyck @ 2016-01-08 17:30 UTC (permalink / raw)
To: Edward Cree; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On Fri, Jan 8, 2016 at 7:32 AM, Edward Cree <ecree@solarflare.com> wrote:
> On 07/01/16 22:53, Alexander Duyck wrote:
>> On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
>>> The arithmetic properties of the ones-complement checksum mean that a
>>> correctly checksummed inner packet, including its checksum, has a ones
>>> complement sum depending only on whatever value was used to initialise
>>> the checksum field before checksumming (in the case of TCP and UDP,
>>> this is the ones complement sum of the pseudo header, complemented).
>>> Consequently, if we are going to offload the inner checksum with
>>> CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
>>> packed data not covered by the inner checksum, and the initial value of
>>> the inner checksum field.
>>>
>>> Signed-off-by: Edward Cree <ecree@solarflare.com>
>>> ---
>>> include/linux/skbuff.h | 26 ++++++++++++++++++++++++++
>>> net/ipv4/ip_tunnel_core.c | 4 ++++
>>> net/ipv4/udp.c | 29 ++++++++++-------------------
>>> net/ipv6/ip6_checksum.c | 24 ++++++++----------------
>>> 4 files changed, 48 insertions(+), 35 deletions(-)
>>>
>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>>> index 6b6bd42..6590d08 100644
>>> --- a/include/linux/skbuff.h
>>> +++ b/include/linux/skbuff.h
>>> @@ -3665,5 +3665,31 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
>>> return hdr_len + skb_gso_transport_seglen(skb);
>>> }
>>>
>>> +/* Local Checksum Offload.
>>> + * Compute outer checksum based on the assumption that the
>>> + * inner checksum will be offloaded later.
>>> + * See Documentation/networking/tx-offloads.txt for
>>> + * explanation of how this works.
>>> + * Fill in outer checksum adjustment (e.g. with sum of outer
>>> + * pseudo-header) before calling.
>>> + * Also ensure that inner checksum is in linear data area.
>>> + */
>>> +static inline __wsum lco_csum(struct sk_buff *skb)
>>> +{
>>> + char *inner_csum_field;
>>> + __wsum csum;
>>> +
>>> + /* Start with complement of inner checksum adjustment */
>>> + inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
>>> + skb->csum_offset;
>> You would probably benefit from caching off the result of
>> skb_checksum_start_offset into a local variable so the compiler
>> doesn't go through and recompute it when you call it again below.
> It's a nearly-trivial inline function; won't the compiler be smart enough to
> cache that result itself?
Maybe, but maybe not. I can't say for sure. The issue is that you
are reading memory locations of a possibly shared structure so it
could do one of several things. From my perspective it is just kind
of pain to see since the function name is so long and you are having
to call it a couple times. One thing that might almost be useful
would be to consider looking at adding a shortcut for getting the
pointer. Maybe a function like skb_csum_start_ptr(skb), that could
just give you the result of "skb->head + skb->csum_start". Then it
saves you the trouble of having to rely on the compiler to cancel out
skb->data in an expression such as this one.
For now I am probably just engaging in a bit of premature
optimization. I can probably go over the code and clean it up once
the patches are actually in net-next.
>>> + csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
>> This seems like a lot of work, couldn't you get away with just
>> bit-flipping this and moving it into uh->check on the outer header?
> It's not a lot of work: all this does is zero-extend to 32 bits and flip.
> It looks like more, but most of it is just a cast; it's written in this way
> to pacify sparse while using as little __force as possible.
> lco_csum can't move it into uh->check, because it doesn't have uh. In fact,
> the skb might not even be UDP - this function is intended to be used also
> for GRE, which has an IP-style checksum but in a different place. (Next
> version of the patch series will implement that btw)
Actually one minor thing you might want to change here is to do the
bit-flipping first, and then zero extend the value instead of the
other way around. That way it makes this a bit easier to use the
resultant value since you could use simple addition on it with any
other 16 bit checksum without having to worry about needing to carry
anything over from the addition.
>>> + /* Add in checksum of our headers (incl. outer checksum
>>> + * adjustment filled in by caller)
>>> + */
>>> + csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
>>> + /* The result is the outer checksum */
>>> + return csum;
>>> +}
>>> +
>> The more I think about it I am not sure how much there is to be gained
>> by having this as a separate function anyway since I think you might
>> be able to better exploit things with a few changes to the ordering of
>> operations. See my notes below in the IPv4 section.
>>
>>> #endif /* __KERNEL__ */
>>> #endif /* _LINUX_SKBUFF_H */
>>> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
>>> index 1db8418..f39b064 100644
>>> --- a/net/ipv4/ip_tunnel_core.c
>>> +++ b/net/ipv4/ip_tunnel_core.c
>>> @@ -146,6 +146,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
>>> }
>>> EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
>>>
>>> +/* csum_help should only be ever true if the protocol doesn't support LCO.
>>> + * If the tunnel uses udp_tunnel_xmit_skb(), then it gets LCO for free, and
>>> + * should always set csum_help to false.
>>> + */
>>> struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
>>> bool csum_help,
>>> int gso_type_mask)
>>> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
>>> index 8841e98..c1c73be 100644
>>> --- a/net/ipv4/udp.c
>>> +++ b/net/ipv4/udp.c
>>> @@ -767,32 +767,23 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
>>> {
>>> struct udphdr *uh = udp_hdr(skb);
>>>
>>> - if (nocheck)
>>> + if (nocheck) {
>>> uh->check = 0;
>>> - else if (skb_is_gso(skb))
>>> + } else if (skb_is_gso(skb)) {
>>> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
>>> - else if (skb_dst(skb) && skb_dst(skb)->dev &&
>>> - (skb_dst(skb)->dev->features &
>>> - (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
>>> -
>>> - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
>>> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>>> + __wsum csum;
>>>
>> I wonder if this shouldn't be made a check that is in addition to the
>> two options below instead of completely replacing them. The question
>> I would have is if there are any cases where we need to follow the
>> path that results in the CHECKSUM_UNNECESSARY being set.
> I don't think there can be such a case.
> Either: we've already set up PARTIAL for an inner header, so we can
> definitely do LCO.
> Or: we haven't set up PARTIAL yet, so we can use that now. If the device
> doesn't support it, it'll get fixed up later when we validate_xmit_skb().
> So there's no way (AFAICT) that we'd ever not be able to use PARTIAL.
> Unless - hmmm - what happens if we've set up PARTIAL for a CRC rather than
> an IP checksum? However, it looks to me as if in that case the old code
> would have screwed up when iptunnel_handle_offloads() would do the inner
> csum in skb_checksum_help() and would do it as an IP checksum. So I'm
> guessing this probably can't happen. Or it's already broken and so my
> patch won't make it any worse ;)
>
> However, the next version of the patch series will split this change out
> from the rest of the patch, as Tom Herbert suggested.
Works for me.
>>
>>> + uh->check = ~udp_v4_check(len, saddr, daddr, 0);
>>> + csum = lco_csum(skb);
>>> + uh->check = csum_fold(csum);
>>> + if (uh->check == 0)
>>> + uh->check = CSUM_MANGLED_0;
>>
>> You would probably benefit from reordering this to something like what
>> we have in the last block below this one. The idea is then you only
>> halve to fold things once and can avoid some unnecessary duplication.
>>
>> So you could code it up with something like:
>> __wsum csum;
>> int start_offset;
>>
>> start_offset = skb_checksum_start_offset(skb);
>> uh->check = ~(*(__sum16 *)(skb->data + start_offset + skb->csum_offset));
>> csum = skb_checksum(skb, 0, start_offset, 0);
>> uh->check = udp_v4_check(len, saddr, daddr, csum);
>> if (uh->check == 0)
>> uh->check = CSUM_MANGLED_0;
>>
>> Forgive the formatting, my email client mangles tabs badly. By using
>> the pseudo header checksum from the inner header for the starting
>> value and then computing the udp_v4_check for the outer header last
>> you save yourself a few cycles since you only have to fold the
>> checksum once instead of once for the pseudo-header and again for the
>> final result.
> Hmm, I think we can do this without losing the helper function (which will
> be shared not just by UDPv4 and UDPv6 but also GRE).
> Something like this:
> uh->check = 0;
> uh->check = ~udp_v4_check(len, saddr, daddr, lco_csum(skb));
> if (uh->check == 0)
> uh->check = CSUM_MANGLED_0;
> Now the only fold is the one udp_v4_check() does.
> Would that shave off enough cycles to satisfy?
Yeah, that should work. When I was messing around with the code I
realized we cannot assume uh->check to contain the inner checksum
anyway since the GSO path has already populated uh->check with the
partial checksum for the outer headers. This would mean we can reuse
the lco_csum function in the GSO path.
- Alex
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 net-next 2/5] net: enable LCO for udp_tunnel_handle_offloads() users
2016-01-07 17:10 [PATCH v2 net-next 0/5] Local Checksum Offload Edward Cree
2016-01-07 17:12 ` [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation Edward Cree
@ 2016-01-07 17:12 ` Edward Cree
2016-01-07 17:12 ` [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload Edward Cree
` (3 subsequent siblings)
5 siblings, 0 replies; 31+ messages in thread
From: Edward Cree @ 2016-01-07 17:12 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers, Tom Herbert
The only protocol affected at present is Geneve.
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
include/net/udp_tunnel.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index cb2f89f..210eb90 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -103,7 +103,8 @@ static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
{
int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- return iptunnel_handle_offloads(skb, udp_csum, type);
+ /* As we're a UDP tunnel, we support LCO, so don't need csum_help */
+ return iptunnel_handle_offloads(skb, false, type);
}
static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff)
^ permalink raw reply related [flat|nested] 31+ messages in thread* [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-07 17:10 [PATCH v2 net-next 0/5] Local Checksum Offload Edward Cree
2016-01-07 17:12 ` [PATCH v2 net-next 1/5] net: local checksum offload for encapsulation Edward Cree
2016-01-07 17:12 ` [PATCH v2 net-next 2/5] net: enable LCO for udp_tunnel_handle_offloads() users Edward Cree
@ 2016-01-07 17:12 ` Edward Cree
2016-01-08 0:15 ` Alexander Duyck
2016-01-08 3:46 ` Alexander Duyck
2016-01-07 17:13 ` [PATCH v2 net-next 4/5] fou: enable LCO in FOU and GUE Edward Cree
` (2 subsequent siblings)
5 siblings, 2 replies; 31+ messages in thread
From: Edward Cree @ 2016-01-07 17:12 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers, Tom Herbert
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
drivers/net/vxlan.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6369a57..d4acbc9 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1733,7 +1733,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
goto err;
}
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
+ skb = iptunnel_handle_offloads(skb, false, type);
if (IS_ERR(skb)) {
err = -EINVAL;
goto err;
@@ -1814,7 +1814,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk
if (WARN_ON(!skb))
return -ENOMEM;
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
+ skb = iptunnel_handle_offloads(skb, false, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-07 17:12 ` [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload Edward Cree
@ 2016-01-08 0:15 ` Alexander Duyck
2016-01-08 15:33 ` Edward Cree
2016-01-08 3:46 ` Alexander Duyck
1 sibling, 1 reply; 31+ messages in thread
From: Alexander Duyck @ 2016-01-08 0:15 UTC (permalink / raw)
To: Edward Cree; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> drivers/net/vxlan.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 6369a57..d4acbc9 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1733,7 +1733,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
> goto err;
> }
>
A few lines up from here there is a "udp_sum = false" that can be
dropped for the remote checksum stuff.
> - skb = iptunnel_handle_offloads(skb, udp_sum, type);
> + skb = iptunnel_handle_offloads(skb, false, type);
> if (IS_ERR(skb)) {
> err = -EINVAL;
> goto err;
> @@ -1814,7 +1814,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk
> if (WARN_ON(!skb))
> return -ENOMEM;
>
Same thing for this one. If you aren't passing it there is no point
in updating it a few lines above here.
> - skb = iptunnel_handle_offloads(skb, udp_sum, type);
> + skb = iptunnel_handle_offloads(skb, false, type);
> if (IS_ERR(skb))
> return PTR_ERR(skb);
>
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 0:15 ` Alexander Duyck
@ 2016-01-08 15:33 ` Edward Cree
0 siblings, 0 replies; 31+ messages in thread
From: Edward Cree @ 2016-01-08 15:33 UTC (permalink / raw)
To: Alexander Duyck; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On 08/01/16 00:15, Alexander Duyck wrote:
> On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
>> ---
>> drivers/net/vxlan.c | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index 6369a57..d4acbc9 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -1733,7 +1733,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
>> goto err;
>> }
>>
> A few lines up from here there is a "udp_sum = false" that can be
> dropped for the remote checksum stuff.
Good catch. Fixed up these (and equivalents in the FOU/GUE patch) for the
nextversion of the patch series.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-07 17:12 ` [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload Edward Cree
2016-01-08 0:15 ` Alexander Duyck
@ 2016-01-08 3:46 ` Alexander Duyck
2016-01-08 15:39 ` Edward Cree
1 sibling, 1 reply; 31+ messages in thread
From: Alexander Duyck @ 2016-01-08 3:46 UTC (permalink / raw)
To: Edward Cree; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On Thu, Jan 7, 2016 at 9:12 AM, Edward Cree <ecree@solarflare.com> wrote:
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> drivers/net/vxlan.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 6369a57..d4acbc9 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1733,7 +1733,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
> goto err;
> }
>
> - skb = iptunnel_handle_offloads(skb, udp_sum, type);
> + skb = iptunnel_handle_offloads(skb, false, type);
> if (IS_ERR(skb)) {
> err = -EINVAL;
> goto err;
> @@ -1814,7 +1814,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk
> if (WARN_ON(!skb))
> return -ENOMEM;
>
> - skb = iptunnel_handle_offloads(skb, udp_sum, type);
> + skb = iptunnel_handle_offloads(skb, false, type);
> if (IS_ERR(skb))
> return PTR_ERR(skb);
>
>
So I just tried testing your patches and as it turns out you are still
missing some bits. In this patch for instance the calls to
udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
that you pass false for the last parameter and allow the use of your
udp_set_csum modifications.
- Alex
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 3:46 ` Alexander Duyck
@ 2016-01-08 15:39 ` Edward Cree
2016-01-08 18:03 ` Alexander Duyck
0 siblings, 1 reply; 31+ messages in thread
From: Edward Cree @ 2016-01-08 15:39 UTC (permalink / raw)
To: Alexander Duyck; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On 08/01/16 03:46, Alexander Duyck wrote:
> So I just tried testing your patches and as it turns out you are still
> missing some bits. In this patch for instance the calls to
> udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
> that you pass false for the last parameter and allow the use of your
> udp_set_csum modifications.
>
> - Alex
Are you remembering to enable outer checksums on your vxlan tunnel? It's
the vxflags & VXLAN_F_UDP_CSUM check, and to enable it you need a recent
iproute2 so you can use the 'udpcsum' option when creating the vxlan device.
It would be nice if that could just default to enabled now that we have LCO,
but I don't think we can change that now (ABIs set in stone and all that),
so I think it would have to be iproute2 that turned it on by default.
-ed
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 15:39 ` Edward Cree
@ 2016-01-08 18:03 ` Alexander Duyck
2016-01-08 19:40 ` Jesse Gross
0 siblings, 1 reply; 31+ messages in thread
From: Alexander Duyck @ 2016-01-08 18:03 UTC (permalink / raw)
To: Edward Cree; +Cc: David Miller, Netdev, linux-net-drivers, Tom Herbert
On Fri, Jan 8, 2016 at 7:39 AM, Edward Cree <ecree@solarflare.com> wrote:
> On 08/01/16 03:46, Alexander Duyck wrote:
>> So I just tried testing your patches and as it turns out you are still
>> missing some bits. In this patch for instance the calls to
>> udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
>> that you pass false for the last parameter and allow the use of your
>> udp_set_csum modifications.
>>
>> - Alex
> Are you remembering to enable outer checksums on your vxlan tunnel? It's
> the vxflags & VXLAN_F_UDP_CSUM check, and to enable it you need a recent
> iproute2 so you can use the 'udpcsum' option when creating the vxlan device.
No I hadn't done that. I kind of assumed that the checksums were just
being enabled by default. That might explain the some of the issues I
had.. ;-)
> It would be nice if that could just default to enabled now that we have LCO,
> but I don't think we can change that now (ABIs set in stone and all that),
> so I think it would have to be iproute2 that turned it on by default.
If anything we might want to expose the capabilities of the kernel so
that iproute2 could make an informed decision on if it want to enable
the outer checksum by default or not.
- Alex
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 18:03 ` Alexander Duyck
@ 2016-01-08 19:40 ` Jesse Gross
2016-01-08 21:22 ` Alexander Duyck
0 siblings, 1 reply; 31+ messages in thread
From: Jesse Gross @ 2016-01-08 19:40 UTC (permalink / raw)
To: Alexander Duyck
Cc: Edward Cree, David Miller, Netdev, linux-net-drivers, Tom Herbert
On Fri, Jan 8, 2016 at 10:03 AM, Alexander Duyck
<alexander.duyck@gmail.com> wrote:
> On Fri, Jan 8, 2016 at 7:39 AM, Edward Cree <ecree@solarflare.com> wrote:
>> On 08/01/16 03:46, Alexander Duyck wrote:
>>> So I just tried testing your patches and as it turns out you are still
>>> missing some bits. In this patch for instance the calls to
>>> udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
>>> that you pass false for the last parameter and allow the use of your
>>> udp_set_csum modifications.
>>>
>>> - Alex
>> Are you remembering to enable outer checksums on your vxlan tunnel? It's
>> the vxflags & VXLAN_F_UDP_CSUM check, and to enable it you need a recent
>> iproute2 so you can use the 'udpcsum' option when creating the vxlan device.
>
> No I hadn't done that. I kind of assumed that the checksums were just
> being enabled by default. That might explain the some of the issues I
> had.. ;-)
>
>> It would be nice if that could just default to enabled now that we have LCO,
>> but I don't think we can change that now (ABIs set in stone and all that),
>> so I think it would have to be iproute2 that turned it on by default.
>
> If anything we might want to expose the capabilities of the kernel so
> that iproute2 could make an informed decision on if it want to enable
> the outer checksum by default or not.
Keep in mind that protocols like VXLAN specify that the UDP checksum
should be set to zero. As a result, enabling the checksum is not a
purely local decision and it probably shouldn't be on by default.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 19:40 ` Jesse Gross
@ 2016-01-08 21:22 ` Alexander Duyck
2016-01-08 21:36 ` Rick Jones
2016-01-11 17:24 ` Jesse Gross
0 siblings, 2 replies; 31+ messages in thread
From: Alexander Duyck @ 2016-01-08 21:22 UTC (permalink / raw)
To: Jesse Gross
Cc: Edward Cree, David Miller, Netdev, linux-net-drivers, Tom Herbert
On Fri, Jan 8, 2016 at 11:40 AM, Jesse Gross <jesse@kernel.org> wrote:
> On Fri, Jan 8, 2016 at 10:03 AM, Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
>> On Fri, Jan 8, 2016 at 7:39 AM, Edward Cree <ecree@solarflare.com> wrote:
>>> On 08/01/16 03:46, Alexander Duyck wrote:
>>>> So I just tried testing your patches and as it turns out you are still
>>>> missing some bits. In this patch for instance the calls to
>>>> udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
>>>> that you pass false for the last parameter and allow the use of your
>>>> udp_set_csum modifications.
>>>>
>>>> - Alex
>>> Are you remembering to enable outer checksums on your vxlan tunnel? It's
>>> the vxflags & VXLAN_F_UDP_CSUM check, and to enable it you need a recent
>>> iproute2 so you can use the 'udpcsum' option when creating the vxlan device.
>>
>> No I hadn't done that. I kind of assumed that the checksums were just
>> being enabled by default. That might explain the some of the issues I
>> had.. ;-)
>>
>>> It would be nice if that could just default to enabled now that we have LCO,
>>> but I don't think we can change that now (ABIs set in stone and all that),
>>> so I think it would have to be iproute2 that turned it on by default.
>>
>> If anything we might want to expose the capabilities of the kernel so
>> that iproute2 could make an informed decision on if it want to enable
>> the outer checksum by default or not.
>
> Keep in mind that protocols like VXLAN specify that the UDP checksum
> should be set to zero. As a result, enabling the checksum is not a
> purely local decision and it probably shouldn't be on by default.
Yes, but the RFC allows for a non-zero checksum. The key difference
here is the wording between SHOULD and MUST. We aren't requiring the
other endpoint to enable outer checksum on its end, and we don't
require the remote end to validate the outer checksum. We have made
that purely optional. If however the other end is also a Linux client
it can make use of the outer checksum and hardware offloads to greatly
improve the performance.
It is essentially all the goodness of remote checksum offload without
the compatibility negatives, assuming of course everyone implemented
things according to the RFC and didn't make the 0 checksum mandatory..
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 21:22 ` Alexander Duyck
@ 2016-01-08 21:36 ` Rick Jones
2016-01-08 22:07 ` Tom Herbert
2016-01-11 17:24 ` Jesse Gross
1 sibling, 1 reply; 31+ messages in thread
From: Rick Jones @ 2016-01-08 21:36 UTC (permalink / raw)
To: Alexander Duyck, Jesse Gross
Cc: Edward Cree, David Miller, Netdev, linux-net-drivers, Tom Herbert
On 01/08/2016 01:22 PM, Alexander Duyck wrote:
> and we don't require the remote end to validate the outer checksum.
Doesn't the UDP specification require validating a non-zero checksum on
receipt?
rick jones
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 21:36 ` Rick Jones
@ 2016-01-08 22:07 ` Tom Herbert
0 siblings, 0 replies; 31+ messages in thread
From: Tom Herbert @ 2016-01-08 22:07 UTC (permalink / raw)
To: Rick Jones
Cc: Alexander Duyck, Jesse Gross, Edward Cree, David Miller, Netdev,
linux-net-drivers
On Fri, Jan 8, 2016 at 1:36 PM, Rick Jones <rick.jones2@hpe.com> wrote:
> On 01/08/2016 01:22 PM, Alexander Duyck wrote:
>>
>> and we don't require the remote end to validate the outer checksum.
>
>
> Doesn't the UDP specification require validating a non-zero checksum on
> receipt?
>
Yes, VXLAN RFC is broken in this regard since it allows a receiver to
ignore a non-zero checksums. In Linux we always validate non-zero
checksums.
For VXLAN, UDP checksums are not enabled for IPv4 by default, but are
enabled by default for IPv6. Checksums are required for UDP/IPv6,
however RFC6935 and RFC6936 relax this requirement a bit for tunnels,
although IMO not enough to make disabling UDP checksums for VXLAN/IPv6
the default.
Tom
> rick jones
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-08 21:22 ` Alexander Duyck
2016-01-08 21:36 ` Rick Jones
@ 2016-01-11 17:24 ` Jesse Gross
2016-01-11 17:55 ` Tom Herbert
1 sibling, 1 reply; 31+ messages in thread
From: Jesse Gross @ 2016-01-11 17:24 UTC (permalink / raw)
To: Alexander Duyck
Cc: Edward Cree, David Miller, Netdev, linux-net-drivers, Tom Herbert
On Fri, Jan 8, 2016 at 1:22 PM, Alexander Duyck
<alexander.duyck@gmail.com> wrote:
> On Fri, Jan 8, 2016 at 11:40 AM, Jesse Gross <jesse@kernel.org> wrote:
>> On Fri, Jan 8, 2016 at 10:03 AM, Alexander Duyck
>> <alexander.duyck@gmail.com> wrote:
>>> On Fri, Jan 8, 2016 at 7:39 AM, Edward Cree <ecree@solarflare.com> wrote:
>>>> On 08/01/16 03:46, Alexander Duyck wrote:
>>>>> So I just tried testing your patches and as it turns out you are still
>>>>> missing some bits. In this patch for instance the calls to
>>>>> udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
>>>>> that you pass false for the last parameter and allow the use of your
>>>>> udp_set_csum modifications.
>>>>>
>>>>> - Alex
>>>> Are you remembering to enable outer checksums on your vxlan tunnel? It's
>>>> the vxflags & VXLAN_F_UDP_CSUM check, and to enable it you need a recent
>>>> iproute2 so you can use the 'udpcsum' option when creating the vxlan device.
>>>
>>> No I hadn't done that. I kind of assumed that the checksums were just
>>> being enabled by default. That might explain the some of the issues I
>>> had.. ;-)
>>>
>>>> It would be nice if that could just default to enabled now that we have LCO,
>>>> but I don't think we can change that now (ABIs set in stone and all that),
>>>> so I think it would have to be iproute2 that turned it on by default.
>>>
>>> If anything we might want to expose the capabilities of the kernel so
>>> that iproute2 could make an informed decision on if it want to enable
>>> the outer checksum by default or not.
>>
>> Keep in mind that protocols like VXLAN specify that the UDP checksum
>> should be set to zero. As a result, enabling the checksum is not a
>> purely local decision and it probably shouldn't be on by default.
>
> Yes, but the RFC allows for a non-zero checksum. The key difference
> here is the wording between SHOULD and MUST. We aren't requiring the
> other endpoint to enable outer checksum on its end, and we don't
> require the remote end to validate the outer checksum. We have made
> that purely optional. If however the other end is also a Linux client
> it can make use of the outer checksum and hardware offloads to greatly
> improve the performance.
>
> It is essentially all the goodness of remote checksum offload without
> the compatibility negatives, assuming of course everyone implemented
> things according to the RFC and didn't make the 0 checksum mandatory..
It's hard to say what other types of endpoints might do if they
receive a VXLAN packet with a checksum set and, of course, there are
other protocols that don't specify that received UDP checksums can be
ignored.
I'm all in favor of enabling support for the checksum but it doesn't
seem right to make the default behavior go against the RFC
recommendation and common implementation.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-11 17:24 ` Jesse Gross
@ 2016-01-11 17:55 ` Tom Herbert
2016-01-11 18:27 ` Edward Cree
0 siblings, 1 reply; 31+ messages in thread
From: Tom Herbert @ 2016-01-11 17:55 UTC (permalink / raw)
To: Jesse Gross
Cc: Alexander Duyck, Edward Cree, David Miller, Netdev,
linux-net-drivers
On Mon, Jan 11, 2016 at 9:24 AM, Jesse Gross <jesse@kernel.org> wrote:
> On Fri, Jan 8, 2016 at 1:22 PM, Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
>> On Fri, Jan 8, 2016 at 11:40 AM, Jesse Gross <jesse@kernel.org> wrote:
>>> On Fri, Jan 8, 2016 at 10:03 AM, Alexander Duyck
>>> <alexander.duyck@gmail.com> wrote:
>>>> On Fri, Jan 8, 2016 at 7:39 AM, Edward Cree <ecree@solarflare.com> wrote:
>>>>> On 08/01/16 03:46, Alexander Duyck wrote:
>>>>>> So I just tried testing your patches and as it turns out you are still
>>>>>> missing some bits. In this patch for instance the calls to
>>>>>> udp_tunnel_xmit_skb and udp_tunnel6_xmit skb need to be updated so
>>>>>> that you pass false for the last parameter and allow the use of your
>>>>>> udp_set_csum modifications.
>>>>>>
>>>>>> - Alex
>>>>> Are you remembering to enable outer checksums on your vxlan tunnel? It's
>>>>> the vxflags & VXLAN_F_UDP_CSUM check, and to enable it you need a recent
>>>>> iproute2 so you can use the 'udpcsum' option when creating the vxlan device.
>>>>
>>>> No I hadn't done that. I kind of assumed that the checksums were just
>>>> being enabled by default. That might explain the some of the issues I
>>>> had.. ;-)
>>>>
>>>>> It would be nice if that could just default to enabled now that we have LCO,
>>>>> but I don't think we can change that now (ABIs set in stone and all that),
>>>>> so I think it would have to be iproute2 that turned it on by default.
>>>>
>>>> If anything we might want to expose the capabilities of the kernel so
>>>> that iproute2 could make an informed decision on if it want to enable
>>>> the outer checksum by default or not.
>>>
>>> Keep in mind that protocols like VXLAN specify that the UDP checksum
>>> should be set to zero. As a result, enabling the checksum is not a
>>> purely local decision and it probably shouldn't be on by default.
>>
>> Yes, but the RFC allows for a non-zero checksum. The key difference
>> here is the wording between SHOULD and MUST. We aren't requiring the
>> other endpoint to enable outer checksum on its end, and we don't
>> require the remote end to validate the outer checksum. We have made
>> that purely optional. If however the other end is also a Linux client
>> it can make use of the outer checksum and hardware offloads to greatly
>> improve the performance.
>>
>> It is essentially all the goodness of remote checksum offload without
>> the compatibility negatives, assuming of course everyone implemented
>> things according to the RFC and didn't make the 0 checksum mandatory..
>
> It's hard to say what other types of endpoints might do if they
> receive a VXLAN packet with a checksum set and, of course, there are
> other protocols that don't specify that received UDP checksums can be
> ignored.
>
RFC1122 is very clear on the subject of UDP checksums:
"If a UDP datagram is received with a checksum that is non- zero and
invalid, UDP MUST silently discard the datagram."
So if a VXLAN receiver accepts a UDP packet without checking a
non-zero UDP checksum for validity they are breaking the UDP protocol.
If they blow up because they don't expect to receive non-zero
checksums that is entirely their problem.
> I'm all in favor of enabling support for the checksum but it doesn't
> seem right to make the default behavior go against the RFC
> recommendation and common implementation.
RFC2460 requires UDP checksums for IPv6 which IMO supersedes what is
recommended in RFC7348 which says that the checksum SHOULD be
transmitted as zero. RFC6935 and RFC 6936 permit zero checksums for
tunneled packets over IPv6 under specific conditions, but we cannot
say that those conditions are meant in every deployment to make that a
default. This is interpretation is consistent with other foo/UDP, like
MPLS/UDP RFC7510 which states:
"When UDP is used over IPv6, the UDP checksum is relied upon to
protect both the IPv6 and UDP headers from corruption and MUST be used
unless the requirements in [RFC6935] and [RFC6936] for use of UDP
zero-checksum mode with a tunnel protocol are satisfied."
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-11 17:55 ` Tom Herbert
@ 2016-01-11 18:27 ` Edward Cree
2016-01-11 18:43 ` Tom Herbert
0 siblings, 1 reply; 31+ messages in thread
From: Edward Cree @ 2016-01-11 18:27 UTC (permalink / raw)
To: Tom Herbert, Jesse Gross
Cc: Alexander Duyck, David Miller, Netdev, linux-net-drivers
On 11/01/16 17:55, Tom Herbert wrote:
> On Mon, Jan 11, 2016 at 9:24 AM, Jesse Gross <jesse@kernel.org> wrote:
>> It's hard to say what other types of endpoints might do if they
>> receive a VXLAN packet with a checksum set and, of course, there are
>> other protocols that don't specify that received UDP checksums can be
>> ignored.
>>
> RFC1122 is very clear on the subject of UDP checksums:
>
> "If a UDP datagram is received with a checksum that is non- zero and
> invalid, UDP MUST silently discard the datagram."
>
> So if a VXLAN receiver accepts a UDP packet without checking a
> non-zero UDP checksum for validity they are breaking the UDP protocol.
> If they blow up because they don't expect to receive non-zero
> checksums that is entirely their problem.
More to the point, RFC7348 says that if the outer checksum isvalid, the
receiver MUST accept it.
"If the decapsulating destination chooses not to perform the verification,
or performs it successfully, the packet MUST be accepted for decapsulation."
Therefore, if they blow up because they don't expect to receive non-zero
checksums, they're not compliant with the spec.
On the other hand, there is always 'be conservative in what you emit'... how
far should you go to interoperate with broken implementations.
On the gripping hand, I suspect RFC7348 only recommends against outer csums
in the first place because the authors believed it was expensive.
-ed
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload
2016-01-11 18:27 ` Edward Cree
@ 2016-01-11 18:43 ` Tom Herbert
0 siblings, 0 replies; 31+ messages in thread
From: Tom Herbert @ 2016-01-11 18:43 UTC (permalink / raw)
To: Edward Cree
Cc: Jesse Gross, Alexander Duyck, David Miller, Netdev,
linux-net-drivers
On Mon, Jan 11, 2016 at 10:27 AM, Edward Cree <ecree@solarflare.com> wrote:
> On 11/01/16 17:55, Tom Herbert wrote:
>> On Mon, Jan 11, 2016 at 9:24 AM, Jesse Gross <jesse@kernel.org> wrote:
>>> It's hard to say what other types of endpoints might do if they
>>> receive a VXLAN packet with a checksum set and, of course, there are
>>> other protocols that don't specify that received UDP checksums can be
>>> ignored.
>>>
>> RFC1122 is very clear on the subject of UDP checksums:
>>
>> "If a UDP datagram is received with a checksum that is non- zero and
>> invalid, UDP MUST silently discard the datagram."
>>
>> So if a VXLAN receiver accepts a UDP packet without checking a
>> non-zero UDP checksum for validity they are breaking the UDP protocol.
>> If they blow up because they don't expect to receive non-zero
>> checksums that is entirely their problem.
> More to the point, RFC7348 says that if the outer checksum isvalid, the
> receiver MUST accept it.
But the only way to know if a checksum is valid is to actually do the
computation. :-)
> "If the decapsulating destination chooses not to perform the verification,
> or performs it successfully, the packet MUST be accepted for decapsulation."
> Therefore, if they blow up because they don't expect to receive non-zero
> checksums, they're not compliant with the spec.
> On the other hand, there is always 'be conservative in what you emit'... how
> far should you go to interoperate with broken implementations.
> On the gripping hand, I suspect RFC7348 only recommends against outer csums
> in the first place because the authors believed it was expensive.
>
Right, I've probably mentioned it at least ten times in nvo3 that we
usually get *better* performance by enabling the outer checksums due
to use of checksum offload. The reason Cisco doesn't want payload
checksums is because switch hardware typically doesn't support it, but
this doesn't justify changing fundamental UDP requirements for their
purposes. Making enabling of transmitting UDP checksums per tunnel is
sufficient to allow interoperability with such devices.
> -ed
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 net-next 4/5] fou: enable LCO in FOU and GUE
2016-01-07 17:10 [PATCH v2 net-next 0/5] Local Checksum Offload Edward Cree
` (2 preceding siblings ...)
2016-01-07 17:12 ` [PATCH v2 net-next 3/5] net: vxlan: enable local checksum offload Edward Cree
@ 2016-01-07 17:13 ` Edward Cree
2016-01-07 18:51 ` Tom Herbert
2016-01-07 17:14 ` [PATCH v2 net-next 5/5] Documentation/networking: add tx-offloads.txt to explain LCO Edward Cree
2016-01-11 17:05 ` [RFC PATCH 0/2] Rework of "net: local checksum offload for encapsulation" Alexander Duyck
5 siblings, 1 reply; 31+ messages in thread
From: Edward Cree @ 2016-01-07 17:13 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers, Tom Herbert
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
net/ipv4/fou.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e0fcbbb..2b0002f 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -773,7 +773,6 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
uh->dest = e->dport;
uh->source = sport;
uh->len = htons(skb->len);
- uh->check = 0;
udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
fl4->saddr, fl4->daddr, skb->len);
@@ -787,7 +786,7 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
__be16 sport;
- skb = iptunnel_handle_offloads(skb, csum, type);
+ skb = iptunnel_handle_offloads(skb, false, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -821,7 +820,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
optlen += need_priv ? GUE_LEN_PRIV : 0;
- skb = iptunnel_handle_offloads(skb, csum, type);
+ skb = iptunnel_handle_offloads(skb, false, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 4/5] fou: enable LCO in FOU and GUE
2016-01-07 17:13 ` [PATCH v2 net-next 4/5] fou: enable LCO in FOU and GUE Edward Cree
@ 2016-01-07 18:51 ` Tom Herbert
2016-01-07 19:00 ` Edward Cree
0 siblings, 1 reply; 31+ messages in thread
From: Tom Herbert @ 2016-01-07 18:51 UTC (permalink / raw)
To: Edward Cree
Cc: David Miller, Linux Kernel Network Developers, linux-net-drivers
On Thu, Jan 7, 2016 at 9:13 AM, Edward Cree <ecree@solarflare.com> wrote:
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> net/ipv4/fou.c | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
> index e0fcbbb..2b0002f 100644
> --- a/net/ipv4/fou.c
> +++ b/net/ipv4/fou.c
> @@ -773,7 +773,6 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
> uh->dest = e->dport;
> uh->source = sport;
> uh->len = htons(skb->len);
> - uh->check = 0;
> udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
> fl4->saddr, fl4->daddr, skb->len);
>
> @@ -787,7 +786,7 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
> int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> __be16 sport;
>
> - skb = iptunnel_handle_offloads(skb, csum, type);
> + skb = iptunnel_handle_offloads(skb, false, type);
It might good to have another iptunnel_handle_offloads (lifke
iptunnel_handle_offloads_nocsum) function that doesn't have csum
parameter for readability.
>
> if (IS_ERR(skb))
> return PTR_ERR(skb);
> @@ -821,7 +820,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
>
> optlen += need_priv ? GUE_LEN_PRIV : 0;
>
> - skb = iptunnel_handle_offloads(skb, csum, type);
> + skb = iptunnel_handle_offloads(skb, false, type);
>
> if (IS_ERR(skb))
> return PTR_ERR(skb);
>
> The information contained in this message is confidential and is intended for the addressee(s) only. If you have received this message in error, please notify the sender immediately and delete the message. Unless you are an addressee (or authorized to receive for an addressee), you may not use, copy or disclose to anyone this message or any information contained in this message. The unauthorized use, disclosure, copying or alteration of this message is strictly prohibited.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 net-next 4/5] fou: enable LCO in FOU and GUE
2016-01-07 18:51 ` Tom Herbert
@ 2016-01-07 19:00 ` Edward Cree
0 siblings, 0 replies; 31+ messages in thread
From: Edward Cree @ 2016-01-07 19:00 UTC (permalink / raw)
To: Tom Herbert
Cc: David Miller, Linux Kernel Network Developers, linux-net-drivers
On 07/01/16 18:51, Tom Herbert wrote:
> On Thu, Jan 7, 2016 at 9:13 AM, Edward Cree <ecree@solarflare.com> wrote:
>> @@ -787,7 +786,7 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
>> int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
>> __be16 sport;
>>
>> - skb = iptunnel_handle_offloads(skb, csum, type);
>> + skb = iptunnel_handle_offloads(skb, false, type);
> It might good to have another iptunnel_handle_offloads (lifke
> iptunnel_handle_offloads_nocsum) function that doesn't have csum
> parameter for readability.
I hope to follow up soon with a patch to use LCO for GRE, at which point I
believe that all callers will be passing false anyway, so the csum_help
parameter can be removed from the function entirely.
-ed
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 net-next 5/5] Documentation/networking: add tx-offloads.txt to explain LCO
2016-01-07 17:10 [PATCH v2 net-next 0/5] Local Checksum Offload Edward Cree
` (3 preceding siblings ...)
2016-01-07 17:13 ` [PATCH v2 net-next 4/5] fou: enable LCO in FOU and GUE Edward Cree
@ 2016-01-07 17:14 ` Edward Cree
2016-01-07 18:58 ` Tom Herbert
2016-01-11 17:05 ` [RFC PATCH 0/2] Rework of "net: local checksum offload for encapsulation" Alexander Duyck
5 siblings, 1 reply; 31+ messages in thread
From: Edward Cree @ 2016-01-07 17:14 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers, Tom Herbert
It would also be a good place to explain GSO if someone wants to do that...
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
Documentation/networking/00-INDEX | 2 +
Documentation/networking/tx-offloads.txt | 122 +++++++++++++++++++++++++++++++
2 files changed, 124 insertions(+)
create mode 100644 Documentation/networking/tx-offloads.txt
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index df27a1a..1fcf432 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -216,6 +216,8 @@ tproxy.txt
- Transparent proxy support user guide.
tuntap.txt
- TUN/TAP device driver, allowing user space Rx/Tx of packets.
+tx-offloads.txt
+ - Explanation of Transmit Offloads: Checksums, LCO, RCO
udplite.txt
- UDP-Lite protocol (RFC 3828) introduction.
vortex.txt
diff --git a/Documentation/networking/tx-offloads.txt b/Documentation/networking/tx-offloads.txt
new file mode 100644
index 0000000..1a51993
--- /dev/null
+++ b/Documentation/networking/tx-offloads.txt
@@ -0,0 +1,122 @@
+Transmit Offloads in the Linux Networking Stack
+
+
+Introduction
+============
+
+This document describes a set of techniques in the Linux networking stack
+ to take advantage of offload capabilities of various NICs.
+
+The following technologies are described:
+ * TX Checksum Offload
+ * LCO: Local Checksum Offload
+ * RCO: Remote Checksum Offload
+
+Things that should be documented here but aren't yet:
+ * Encapsulation offloads
+ - hardware VLAN acceleration
+ - hardware VXLAN/GRE/GENEVE/etc. encapsulation
+ * Segmentation offloads
+ - GSO: Generic Segmentation Offload
+ - TSO: TCP Segmentation Offload
+
+
+TX Checksum Offload
+===================
+
+The interface for offloading a transmit checksum to a device is explained
+ in detail in comments near the top of include/linux/skbuff.h.
+In brief, it allows to request the device fill in a single ones-complement
+ checksum defined by the sk_buff fields skb->csum_start and
+ skb->csum_offset. The device should compute the 16-bit ones-complement
+ checksum (i.e. the 'IP-style' checksum) from csum_start to the end of the
+ packet, and fill in the result at (csum_start + csum_offset).
+Because csum_offset cannot be negative, this ensures that the previous
+ value of the checksum field is included in the checksum computation, thus
+ it can be used to supply any needed corrections to the checksum (such as
+ the sum of the pseudo-header for UDP or TCP).
+This interface only allows a single checksum to be offloaded. Where
+ encapsulation is used, the packet may have multiple checksum fields in
+ different header layers, and the rest will have to be handled by another
+ mechanism such as LCO or RCO.
+No offloading of the IP header checksum is performed; it is always done in
+ software. This is OK because when we build the IP header, we obviously
+ have it in cache, so summing it isn't expensive. It's also rather short.
+The requirements for GSO are more complicated, because when segmenting an
+ encapsulated packet both the inner and outer checksums may need to be
+ edited or recomputed for each resulting segment. See the skbuff.h comment
+ (section 'E') for more details.
+
+A driver declares its offload capabilities in netdev->hw_features; see
+ Documentation/networking/netdev-features for more. Note that a device
+ which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start
+ and csum_offset given in the SKB; if it tries to deduce these itself in
+ hardware (as some NICs do) the driver should check that the values in the
+ SKB match those which the hardware will deduce, and if not, fall back to
+ checksumming in software instead (with skb_checksum_help or one of the
+ skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h). This
+ is a pain, but that's what you get when hardware tries to be clever.
+
+The stack should, for the most part, assume that checksum offload is
+ supported by the underlying device. The only place that should check is
+ validate_xmit_skb(), and the functions it calls directly or indirectly.
+ That function compares the offload features requested by the SKB (which
+ may include other offloads besides TX Checksum Offload) and, if they are
+ not supported or enabled on the device (determined by netdev->features),
+ performs the corresponding offload in software. In the case of TX
+ Checksum Offload, that means calling skb_checksum_help(skb).
+
+
+LCO: Local Checksum Offload
+===========================
+
+LCO is a technique for efficiently computing the outer checksum of an
+ encapsulated datagram when the inner checksum is due to be offloaded.
+The ones-complement sum of a correctly checksummed TCP or UDP packet is
+ equal to the sum of the pseudo header, because everything else gets
+ 'cancelled out' by the checksum field. This is because the sum was
+ complemented before being written to the checksum field.
+More generally, this holds in any case where the 'IP-style' ones complement
+ checksum is used, and thus any checksum that TX Checksum Offload supports.
+That is, if we have set up TX Checksum Offload with a start/offset pair, we
+ know that _after the device has filled in that checksum_, the ones
+ complement sum from csum_start to the end of the packet will be equal to
+ _whatever value we put in the checksum field beforehand_. This allows us
+ to compute the outer checksum without looking at the payload: we simply
+ stop summing when we get to csum_start, then add the 16-bit word at
+ (csum_start + csum_offset).
+Then, when the true inner checksum is filled in (either by hardware or by
+ skb_checksum_help()), the outer checksum will become correct by virtue of
+ the arithmetic.
+
+LCO is performed by the stack when constructing an outer UDP header for an
+ encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for
+ the IPv6 equivalents, in udp6_set_csum().
+It is *not* currently performed when constructing a GRE header; the
+ GRE checksum is computed over the whole packet in either
+ net/ipv4/ip_gre.c:build_header() or net/ipv6/ip6_gre.c:ip6gre_xmit2(), but
+ it should be possible to use LCO here as GRE uses an IP-style checksum.
+There is a helper function lco_csum(), in include/linux/skbuff.h, which may
+ be useful for this.
+
+LCO can safely be used for nested encapsulations; in this case, the outer
+ encapsulation layer will sum over both its own header and the 'middle'
+ header. This does mean that the 'middle' header will get summed multiple
+ times, but there doesn't seem to be a way to avoid that without incurring
+ bigger costs (e.g. in SKB bloat).
+
+
+RCO: Remote Checksum Offload
+============================
+
+RCO is a technique for eliding the inner checksum of an encapsulated
+ datagram, allowing the outer checksum to be offloaded. It does, however,
+ involve a change to the encapsulation protocols, which the receiver must
+ also support. For this reason, it is disabled by default.
+RCO is detailed in the following Internet-Drafts:
+https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00
+https://tools.ietf.org/html/draft-herbert-vxlan-rco-00
+In Linux, RCO is implemented individually in each encapsulation protocol,
+ and most tunnel types have flags controlling its use. For instance, VXLAN
+ has the flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that
+ RCO should be used when transmitting to a given remote destination.
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v2 net-next 5/5] Documentation/networking: add tx-offloads.txt to explain LCO
2016-01-07 17:14 ` [PATCH v2 net-next 5/5] Documentation/networking: add tx-offloads.txt to explain LCO Edward Cree
@ 2016-01-07 18:58 ` Tom Herbert
0 siblings, 0 replies; 31+ messages in thread
From: Tom Herbert @ 2016-01-07 18:58 UTC (permalink / raw)
To: Edward Cree
Cc: David Miller, Linux Kernel Network Developers, linux-net-drivers
On Thu, Jan 7, 2016 at 9:14 AM, Edward Cree <ecree@solarflare.com> wrote:
> It would also be a good place to explain GSO if someone wants to do that...
>
This looks great! Maybe the title should be checksum-offload.txt
though. We can add the material from skbuff.h to describe all about RX
and TX checksum later on. Segmentation offload could be another doc in
itself.
Tom
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> Documentation/networking/00-INDEX | 2 +
> Documentation/networking/tx-offloads.txt | 122 +++++++++++++++++++++++++++++++
> 2 files changed, 124 insertions(+)
> create mode 100644 Documentation/networking/tx-offloads.txt
>
> diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
> index df27a1a..1fcf432 100644
> --- a/Documentation/networking/00-INDEX
> +++ b/Documentation/networking/00-INDEX
> @@ -216,6 +216,8 @@ tproxy.txt
> - Transparent proxy support user guide.
> tuntap.txt
> - TUN/TAP device driver, allowing user space Rx/Tx of packets.
> +tx-offloads.txt
> + - Explanation of Transmit Offloads: Checksums, LCO, RCO
> udplite.txt
> - UDP-Lite protocol (RFC 3828) introduction.
> vortex.txt
> diff --git a/Documentation/networking/tx-offloads.txt b/Documentation/networking/tx-offloads.txt
> new file mode 100644
> index 0000000..1a51993
> --- /dev/null
> +++ b/Documentation/networking/tx-offloads.txt
> @@ -0,0 +1,122 @@
> +Transmit Offloads in the Linux Networking Stack
> +
> +
> +Introduction
> +============
> +
> +This document describes a set of techniques in the Linux networking stack
> + to take advantage of offload capabilities of various NICs.
> +
> +The following technologies are described:
> + * TX Checksum Offload
> + * LCO: Local Checksum Offload
> + * RCO: Remote Checksum Offload
> +
> +Things that should be documented here but aren't yet:
> + * Encapsulation offloads
> + - hardware VLAN acceleration
> + - hardware VXLAN/GRE/GENEVE/etc. encapsulation
> + * Segmentation offloads
> + - GSO: Generic Segmentation Offload
> + - TSO: TCP Segmentation Offload
> +
> +
> +TX Checksum Offload
> +===================
> +
> +The interface for offloading a transmit checksum to a device is explained
> + in detail in comments near the top of include/linux/skbuff.h.
> +In brief, it allows to request the device fill in a single ones-complement
> + checksum defined by the sk_buff fields skb->csum_start and
> + skb->csum_offset. The device should compute the 16-bit ones-complement
> + checksum (i.e. the 'IP-style' checksum) from csum_start to the end of the
> + packet, and fill in the result at (csum_start + csum_offset).
> +Because csum_offset cannot be negative, this ensures that the previous
> + value of the checksum field is included in the checksum computation, thus
> + it can be used to supply any needed corrections to the checksum (such as
> + the sum of the pseudo-header for UDP or TCP).
> +This interface only allows a single checksum to be offloaded. Where
> + encapsulation is used, the packet may have multiple checksum fields in
> + different header layers, and the rest will have to be handled by another
> + mechanism such as LCO or RCO.
> +No offloading of the IP header checksum is performed; it is always done in
> + software. This is OK because when we build the IP header, we obviously
> + have it in cache, so summing it isn't expensive. It's also rather short.
> +The requirements for GSO are more complicated, because when segmenting an
> + encapsulated packet both the inner and outer checksums may need to be
> + edited or recomputed for each resulting segment. See the skbuff.h comment
> + (section 'E') for more details.
> +
> +A driver declares its offload capabilities in netdev->hw_features; see
> + Documentation/networking/netdev-features for more. Note that a device
> + which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start
> + and csum_offset given in the SKB; if it tries to deduce these itself in
> + hardware (as some NICs do) the driver should check that the values in the
> + SKB match those which the hardware will deduce, and if not, fall back to
> + checksumming in software instead (with skb_checksum_help or one of the
> + skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h). This
> + is a pain, but that's what you get when hardware tries to be clever.
> +
> +The stack should, for the most part, assume that checksum offload is
> + supported by the underlying device. The only place that should check is
> + validate_xmit_skb(), and the functions it calls directly or indirectly.
> + That function compares the offload features requested by the SKB (which
> + may include other offloads besides TX Checksum Offload) and, if they are
> + not supported or enabled on the device (determined by netdev->features),
> + performs the corresponding offload in software. In the case of TX
> + Checksum Offload, that means calling skb_checksum_help(skb).
> +
> +
> +LCO: Local Checksum Offload
> +===========================
> +
> +LCO is a technique for efficiently computing the outer checksum of an
> + encapsulated datagram when the inner checksum is due to be offloaded.
> +The ones-complement sum of a correctly checksummed TCP or UDP packet is
> + equal to the sum of the pseudo header, because everything else gets
> + 'cancelled out' by the checksum field. This is because the sum was
> + complemented before being written to the checksum field.
> +More generally, this holds in any case where the 'IP-style' ones complement
> + checksum is used, and thus any checksum that TX Checksum Offload supports.
> +That is, if we have set up TX Checksum Offload with a start/offset pair, we
> + know that _after the device has filled in that checksum_, the ones
> + complement sum from csum_start to the end of the packet will be equal to
> + _whatever value we put in the checksum field beforehand_. This allows us
> + to compute the outer checksum without looking at the payload: we simply
> + stop summing when we get to csum_start, then add the 16-bit word at
> + (csum_start + csum_offset).
> +Then, when the true inner checksum is filled in (either by hardware or by
> + skb_checksum_help()), the outer checksum will become correct by virtue of
> + the arithmetic.
> +
> +LCO is performed by the stack when constructing an outer UDP header for an
> + encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for
> + the IPv6 equivalents, in udp6_set_csum().
> +It is *not* currently performed when constructing a GRE header; the
> + GRE checksum is computed over the whole packet in either
> + net/ipv4/ip_gre.c:build_header() or net/ipv6/ip6_gre.c:ip6gre_xmit2(), but
> + it should be possible to use LCO here as GRE uses an IP-style checksum.
> +There is a helper function lco_csum(), in include/linux/skbuff.h, which may
> + be useful for this.
> +
> +LCO can safely be used for nested encapsulations; in this case, the outer
> + encapsulation layer will sum over both its own header and the 'middle'
> + header. This does mean that the 'middle' header will get summed multiple
> + times, but there doesn't seem to be a way to avoid that without incurring
> + bigger costs (e.g. in SKB bloat).
> +
> +
> +RCO: Remote Checksum Offload
> +============================
> +
> +RCO is a technique for eliding the inner checksum of an encapsulated
> + datagram, allowing the outer checksum to be offloaded. It does, however,
> + involve a change to the encapsulation protocols, which the receiver must
> + also support. For this reason, it is disabled by default.
> +RCO is detailed in the following Internet-Drafts:
> +https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00
> +https://tools.ietf.org/html/draft-herbert-vxlan-rco-00
> +In Linux, RCO is implemented individually in each encapsulation protocol,
> + and most tunnel types have flags controlling its use. For instance, VXLAN
> + has the flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that
> + RCO should be used when transmitting to a given remote destination.
> The information contained in this message is confidential and is intended for the addressee(s) only. If you have received this message in error, please notify the sender immediately and delete the message. Unless you are an addressee (or authorized to receive for an addressee), you may not use, copy or disclose to anyone this message or any information contained in this message. The unauthorized use, disclosure, copying or alteration of this message is strictly prohibited.
^ permalink raw reply [flat|nested] 31+ messages in thread
* [RFC PATCH 0/2] Rework of "net: local checksum offload for encapsulation"
2016-01-07 17:10 [PATCH v2 net-next 0/5] Local Checksum Offload Edward Cree
` (4 preceding siblings ...)
2016-01-07 17:14 ` [PATCH v2 net-next 5/5] Documentation/networking: add tx-offloads.txt to explain LCO Edward Cree
@ 2016-01-11 17:05 ` Alexander Duyck
2016-01-11 17:06 ` [RFC PATCH 1/2] net: local checksum offload for encapsulation Alexander Duyck
2016-01-11 17:06 ` [RFC PATCH 2/2] net: Add support for UDP local checksum offload as a part of tunnel segmentation Alexander Duyck
5 siblings, 2 replies; 31+ messages in thread
From: Alexander Duyck @ 2016-01-11 17:05 UTC (permalink / raw)
To: ecree, netdev; +Cc: tom, alexander.duyck
Edward,
Attached is your first patch with a couple of modifications in order to
improve performance and enable the use of local checksum offloag for GSO.
The second patch is my UDP local checksum offload patch for UDP tunnel
segmentation offload.
Feel free to incorporate the GSO patch into your series if you want, or
borrow liberally if you can apply some of this to GRE as I haven't gotten
around to working on that yet as I started doing a clean-up on the Intel
drivers to enable NETIF_F_HW_CSUM for a number of parts.
- Alex
---
Alexander Duyck (1):
net: Add support for UDP local checksum offload as a part of tunnel segmentation
Edward Cree (1):
net: local checksum offload for encapsulation
include/linux/skbuff.h | 30 ++++++++++++++++++++++++++++++
net/ipv4/ip_tunnel_core.c | 4 +++-
net/ipv4/udp.c | 24 ++++++------------------
net/ipv4/udp_offload.c | 38 +++++++++++++++++++++++---------------
net/ipv6/ip6_checksum.c | 23 ++++++-----------------
5 files changed, 68 insertions(+), 51 deletions(-)
^ permalink raw reply [flat|nested] 31+ messages in thread* [RFC PATCH 1/2] net: local checksum offload for encapsulation
2016-01-11 17:05 ` [RFC PATCH 0/2] Rework of "net: local checksum offload for encapsulation" Alexander Duyck
@ 2016-01-11 17:06 ` Alexander Duyck
2016-01-11 17:06 ` [RFC PATCH 2/2] net: Add support for UDP local checksum offload as a part of tunnel segmentation Alexander Duyck
1 sibling, 0 replies; 31+ messages in thread
From: Alexander Duyck @ 2016-01-11 17:06 UTC (permalink / raw)
To: ecree, netdev; +Cc: tom, alexander.duyck
From: Edward Cree <ecree@solarflare.com>
The arithmetic properties of the ones-complement checksum mean that a
correctly checksummed inner packet, including its checksum, has a ones
complement sum depending only on whatever value was used to initialise
the checksum field before checksumming (in the case of TCP and UDP,
this is the ones complement sum of the pseudo header, complemented).
Consequently, if we are going to offload the inner checksum with
CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
packed data not covered by the inner checksum, and the initial value of
the inner checksum field.
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---
include/linux/skbuff.h | 30 ++++++++++++++++++++++++++++++
net/ipv4/ip_tunnel_core.c | 4 +++-
net/ipv4/udp.c | 24 ++++++------------------
net/ipv6/ip6_checksum.c | 23 ++++++-----------------
4 files changed, 45 insertions(+), 36 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6b6bd42d6134..538cee5b8eb6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2161,6 +2161,11 @@ static inline int skb_checksum_start_offset(const struct sk_buff *skb)
return skb->csum_start - skb_headroom(skb);
}
+static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
+{
+ return skb->head + skb->csum_start;
+}
+
static inline int skb_transport_offset(const struct sk_buff *skb)
{
return skb_transport_header(skb) - skb->data;
@@ -3578,6 +3583,31 @@ static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
return csum_fold(partial);
}
+/* Local Checksum Offload.
+ * Compute outer checksum based on the assumption that the
+ * inner checksum will be offloaded later.
+ * See Documentation/networking/tx-offloads.txt for
+ * explanation of how this works.
+ * Fill in outer checksum adjustment (e.g. with sum of outer
+ * pseudo-header) before calling.
+ * Also ensure that inner checksum is in linear data area.
+ */
+static inline __wsum lco_csum(struct sk_buff *skb)
+{
+ unsigned char *csum_start = skb_checksum_start(skb);
+ unsigned char *l4_hdr = skb_transport_header(skb);
+ __wsum partial;
+
+ /* Start with complement of inner checksum adjustment */
+ partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
+ skb->csum_offset));
+
+ /* Add in checksum of our headers (incl. outer checksum
+ * adjustment filled in by caller) and return result.
+ */
+ return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
+}
+
static inline bool skb_is_gso(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_size;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index eb52ce950c27..ab10c1ed70a6 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -178,8 +178,10 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
err = skb_checksum_help(skb);
if (unlikely(err))
goto error;
- } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ skb->encapsulation = 0;
skb->ip_summed = CHECKSUM_NONE;
+ }
return skb;
error:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3a66731e3af6..71e4a6d70a49 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -842,28 +842,16 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
uh->check = 0;
else if (skb_is_gso(skb))
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features &
- (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+ else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ uh->check = 0;
+ uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v4_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp_set_csum);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 9a4d7322fb22..8f920580976f 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -98,27 +98,16 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
uh->check = 0;
else if (skb_is_gso(skb))
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+ else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ uh->check = 0;
+ uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v6_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp6_set_csum);
^ permalink raw reply related [flat|nested] 31+ messages in thread* [RFC PATCH 2/2] net: Add support for UDP local checksum offload as a part of tunnel segmentation
2016-01-11 17:05 ` [RFC PATCH 0/2] Rework of "net: local checksum offload for encapsulation" Alexander Duyck
2016-01-11 17:06 ` [RFC PATCH 1/2] net: local checksum offload for encapsulation Alexander Duyck
@ 2016-01-11 17:06 ` Alexander Duyck
1 sibling, 0 replies; 31+ messages in thread
From: Alexander Duyck @ 2016-01-11 17:06 UTC (permalink / raw)
To: ecree, netdev; +Cc: tom, alexander.duyck
This change makes it so that we can use the local checksum offload as a
part of UDP tunnel segmentation offload. The advantage to this is
significant as we can get both inner and outer checksum offloads on hardware
that supports inner checksum offloads. This allows us to make use of the
UDP Rx checksum offload available on most hardware to validate the outer
and inner headers via the code that converts CHECKSUM_UNNECESSARY into
CHECKSUM_COMPLETE for UDP tunnels.
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---
net/ipv4/udp_offload.c | 38 +++++++++++++++++++++++---------------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 130042660181..9543f800763f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -42,28 +42,28 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
bool need_csum = !!(skb_shinfo(skb)->gso_type &
SKB_GSO_UDP_TUNNEL_CSUM);
bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
- bool offload_csum = false, dont_encap = (need_csum || remcsum);
+ bool offload_csum;
oldlen = (u16)~skb->len;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
+ /* Try to offload checksum if possible */
+ offload_csum = !!(need_csum &&
+ ((skb->dev->features & NETIF_F_HW_CSUM) ||
+ (skb->dev->features & (is_ipv6 ?
+ NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
+
skb->encapsulation = 0;
__skb_pull(skb, tnl_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
- skb->encap_hdr_csum = need_csum;
+ skb->encap_hdr_csum = need_csum && !offload_csum;
skb->remcsum_offload = remcsum;
- /* Try to offload checksum if possible */
- offload_csum = !!(need_csum &&
- ((skb->dev->features & NETIF_F_HW_CSUM) ||
- (skb->dev->features & (is_ipv6 ?
- NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
-
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
segs = gso_inner_segment(skb, enc_features);
@@ -81,13 +81,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
int len;
__be32 delta;
- if (dont_encap) {
- skb->encapsulation = 0;
- skb->ip_summed = CHECKSUM_NONE;
- } else {
- /* Only set up inner headers if we might be offloading
- * inner checksum.
- */
+ /* Only set up inner headers if we might be offloading
+ * inner checksum.
+ */
+ if (!remcsum) {
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
}
@@ -111,6 +108,17 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
uh->check = ~csum_fold((__force __wsum)
((__force u32)uh->check +
(__force u32)delta));
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ uh->check = csum_fold(lco_csum(skb));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ continue;
+ }
+
+ skb->encapsulation = 0;
+ skb->ip_summed = CHECKSUM_NONE;
+
if (offload_csum) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
^ permalink raw reply related [flat|nested] 31+ messages in thread