From: Eric Dumazet <eric.dumazet@gmail.com>
To: Bhaskar Dutta <bhaskie@gmail.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>,
Ben Hutchings <bhutchings@solarflare.com>,
netdev@vger.kernel.org, David Miller <davem@davemloft.net>
Subject: Re: TCP-MD5 checksum failure on x86_64 SMP
Date: Fri, 07 May 2010 23:18:57 +0200 [thread overview]
Message-ID: <1273267137.2325.31.camel@edumazet-laptop> (raw)
In-Reply-To: <1273247090.2261.81.camel@edumazet-laptop>
Le vendredi 07 mai 2010 à 17:44 +0200, Eric Dumazet a écrit :
> Le vendredi 07 mai 2010 à 17:18 +0200, Eric Dumazet a écrit :
> > OK, I found the second problem.
> >
> > if/when IP route cache is invalidated, ip_queue_xmit() has to refetch a
> > route and calls sk_setup_caps(sk, &rt->u.dst), destroying the
> >
> > sk->sk_route_caps &= ~NETIF_F_GSO_MASK
> >
> > that MD5 desesperatly try to make all over its way (from
> > tcp_transmit_skb() for example)
> >
> > So we send few bad packets, and everything is fine when
> > tcp_transmit_skb() is called again.
> >
> > You get many errors on remote peer if you do
> >
> > ip route flush cache
> >
Patch solves the problem for me. I tested it with 200 MD5 sockets
established between two 16 cpus machine, with a multiqueue NIC. Trafic
of 100.000 pps per second, and "ip route flush cache" every minute on
both machines. After five hours, not a single frame had a bad hash
value.
Here is the official submission.
[PATCH] net: Introduce sk_route_nocaps
TCP-MD5 sessions have intermittent failures, when route cache is
invalidated. ip_queue_xmit() has to find a new route, calls
sk_setup_caps(sk, &rt->u.dst), destroying the
sk->sk_route_caps &= ~NETIF_F_GSO_MASK
that MD5 desperately try to make all over its way (from
tcp_transmit_skb() for example)
So we send few bad packets, and everything is fine when
tcp_transmit_skb() is called again for this socket.
Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a
socket field, sk_route_nocaps, containing bits to mask on sk_route_caps.
Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
include/net/sock.h | 8 ++++++++
net/core/sock.c | 1 +
net/ipv4/tcp_ipv4.c | 6 +++---
net/ipv4/tcp_output.c | 2 +-
net/ipv6/tcp_ipv6.c | 4 ++--
5 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 1ad6435..abfadfe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -177,6 +177,7 @@ struct sock_common {
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
* @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
+ * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_lingertime: %SO_LINGER l_linger setting
@@ -276,6 +277,7 @@ struct sock {
int sk_forward_alloc;
gfp_t sk_allocation;
int sk_route_caps;
+ int sk_route_nocaps;
int sk_gso_type;
unsigned int sk_gso_max_size;
int sk_rcvlowat;
@@ -1257,6 +1259,12 @@ static inline int sk_can_gso(const struct sock *sk)
extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
+static inline void sk_nocaps_add(struct sock *sk, int flags)
+{
+ sk->sk_route_nocaps |= flags;
+ sk->sk_route_caps &= ~flags;
+}
+
static inline int skb_copy_to_page(struct sock *sk, char __user *from,
struct sk_buff *skb, struct page *page,
int off, int copy)
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bb..5056a6a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1227,6 +1227,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+ sk->sk_route_caps &= ~sk->sk_route_nocaps;
if (sk_can_gso(sk)) {
if (dst->header_len) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70..f1a1dd9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -894,7 +894,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
kfree(newkey);
return -ENOMEM;
}
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
if (tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
@@ -1024,7 +1024,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
return -EINVAL;
tp->md5sig_info = p;
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1465,7 +1465,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (newkey != NULL)
tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
newkey, key->keylen);
- newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
#endif
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e..0193a39 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -872,7 +872,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
tp->af_specific->calc_md5_hash(opts.hash_location,
md5, sk, NULL, skb);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 075f540..bf34893 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -600,7 +600,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
kfree(newkey);
return -ENOMEM;
}
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
if (tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
@@ -737,7 +737,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
return -ENOMEM;
tp->md5sig_info = p;
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
next prev parent reply other threads:[~2010-05-07 21:19 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <i2h571fb4001005031027y4a58c4dtfd28ddcdc08d8401@mail.gmail.com>
2010-05-04 3:30 ` TCP-MD5 checksum failure on x86_64 SMP Bhaskar Dutta
2010-05-04 11:32 ` Ben Hutchings
2010-05-04 14:28 ` Bhaskar Dutta
2010-05-04 16:12 ` Stephen Hemminger
2010-05-04 17:08 ` Bhaskar Dutta
2010-05-04 17:13 ` Stephen Hemminger
2010-05-05 18:03 ` Bhaskar Dutta
2010-05-05 18:53 ` Eric Dumazet
2010-05-06 11:55 ` Bhaskar Dutta
2010-05-06 12:06 ` Eric Dumazet
2010-05-07 5:04 ` David Miller
2010-05-07 5:32 ` Eric Dumazet
2010-05-07 17:14 ` Stephen Hemminger
2010-05-07 17:21 ` Eric Dumazet
2010-05-07 17:36 ` Stephen Hemminger
2010-05-07 21:40 ` Eric Dumazet
2010-05-10 14:55 ` Bijay Singh
2010-05-10 15:18 ` Eric Dumazet
2010-05-10 17:27 ` Bijay Singh
2010-05-11 4:08 ` Bijay Singh
2010-05-11 6:27 ` Eric Dumazet
2010-05-11 8:23 ` Bijay Singh
2010-05-11 20:50 ` Eric Dumazet
2010-05-12 3:20 ` Eric Dumazet
2010-05-12 22:22 ` Stephen Hemminger
2010-05-12 22:24 ` David Miller
2010-05-16 19:53 ` Eric Dumazet
2010-05-16 20:48 ` Eric Dumazet
2010-05-17 3:49 ` Bijay Singh
2010-05-17 5:03 ` Eric Dumazet
2010-05-17 17:22 ` Stephen Hemminger
2010-05-17 20:42 ` Stephen Hemminger
2010-05-17 21:04 ` [PATCH] tcp: tcp_synack_options() fix Eric Dumazet
2010-05-18 5:35 ` David Miller
2010-05-16 7:30 ` TCP-MD5 checksum failure on x86_64 SMP David Miller
2010-05-07 8:46 ` Lars Eggert
2010-05-07 8:55 ` Eric Dumazet
2010-05-07 9:12 ` David Miller
2010-05-07 5:39 ` Eric Dumazet
2010-05-07 8:00 ` Eric Dumazet
2010-05-07 8:59 ` Bhaskar Dutta
2010-05-07 9:37 ` Eric Dumazet
2010-05-07 10:50 ` Bhaskar Dutta
2010-05-07 15:18 ` Eric Dumazet
2010-05-07 15:44 ` Eric Dumazet
2010-05-07 21:18 ` Eric Dumazet [this message]
2010-05-16 7:37 ` David Miller
2010-05-16 7:35 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1273267137.2325.31.camel@edumazet-laptop \
--to=eric.dumazet@gmail.com \
--cc=bhaskie@gmail.com \
--cc=bhutchings@solarflare.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=shemminger@vyatta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox