From: Eric Dumazet <eric.dumazet@gmail.com>
To: unlisted-recipients:; (no To-header on input)
Cc: Julian Anastasov <ja@ssi.bg>, Willy Tarreau <w@1wt.eu>,
David Miller <davem@davemloft.net>,
netdev@vger.kernel.org
Subject: Re: TCP_DEFER_ACCEPT is missing counter update
Date: Fri, 16 Oct 2009 07:00:49 +0200 [thread overview]
Message-ID: <4AD7FE01.1010805@gmail.com> (raw)
In-Reply-To: <4AD7EDB4.7060907@gmail.com>
Eric Dumazet a écrit :
>
>
> So, it appears defer_accept value is not an inherited attribute,
> but shared by all embryons. Therefore we should not touch it.
>
> Of course it should be done, or add a new connection field to count number
> of pure ACKS received on each SYN_RECV embryon.
>
Could be something like this ? (on top of net-next-2.6 of course)
7 bits is more than enough, we could take 5 bits IMHO.
Thanks
[PATCH net-next-2.6] tcp: TCP_DEFER_ACCEPT fix
Julian Anastasov pointed out defer_accept was an attribute of listening socket,
shared by all embryons. We therefore need a new per connection attribute.
We can split current u8 used by cookie_ts into 7 bits used to store
number of pure ACKS received by the embryon, and 1 bit to store cookie_ts.
Note, I use an unsigned int field so that kmemcheck doesnt shoot,
so patch also touches cookie_v4_init_sequence()/cookie_v6_init_sequence()
implementations.
Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
include/net/request_sock.h | 9 ++++++---
include/net/tcp.h | 6 ++++--
net/ipv4/syncookies.c | 7 ++++---
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv4/tcp_minisocks.c | 4 ++--
net/ipv6/syncookies.c | 6 +++---
net/ipv6/tcp_ipv6.c | 2 +-
7 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c719084..3464d90 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -45,9 +45,12 @@ struct request_sock_ops {
*/
struct request_sock {
struct request_sock *dl_next; /* Must be first member! */
- u16 mss;
- u8 retrans;
- u8 cookie_ts; /* syncookie: encode tcpopts in timestamp */
+ kmemcheck_bitfield_begin(flags);
+ unsigned int mss : 16,
+ retrans : 8,
+ req_acks : 7,
+ cookie_ts : 1; /* syncookie: encode tcpopts in timestamp */
+ kmemcheck_bitfield_end(flags);
/* The following two fields can be easily recomputed I think -AK */
u32 window_clamp; /* window clamp at creation time */
u32 rcv_wnd; /* rcv_wnd offered first time */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..a2c0439 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -453,7 +453,7 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt);
extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
- __u16 *mss);
+ struct request_sock *req);
extern __u32 cookie_init_timestamp(struct request_sock *req);
extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
@@ -461,7 +461,7 @@ extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
/* From net/ipv6/syncookies.c */
extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb,
- __u16 *mss);
+ struct request_sock *req);
/* tcp_output.c */
@@ -1000,7 +1000,9 @@ static inline void tcp_openreq_init(struct request_sock *req,
struct inet_request_sock *ireq = inet_rsk(req);
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+ kmemcheck_annotate_bitfield(req, flags);
req->cookie_ts = 0;
+ req->req_acks = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5ec678a..8af9261 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -160,19 +160,20 @@ static __u16 const msstab[] = {
* Generate a syncookie. mssp points to the mss, which is returned
* rounded down to the value encoded in the cookie.
*/
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req)
{
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
int mssind;
- const __u16 mss = *mssp;
+ const __u16 mss = req->mss;
tcp_synq_overflow(sk);
/* XXX sort msstab[] by probability? Binary search? */
for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
;
- *mssp = msstab[mssind] + 1;
+ req->mss = msstab[mssind] + 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9971870..3a2dfc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1286,7 +1286,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
syn_flood_warning(skb);
req->cookie_ts = tmp_opt.tstamp_ok;
#endif
- isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+ isn = cookie_v4_init_sequence(sk, skb, req);
} else if (!isn) {
struct inet_peer *peer = NULL;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e320afe..92778e6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -642,9 +642,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return NULL;
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
- if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept > req->req_acks &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
- inet_csk(sk)->icsk_accept_queue.rskq_defer_accept--;
+ req->req_acks++;
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index cbe55e5..60d024d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -125,18 +125,18 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
& COOKIEMASK;
}
-__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, struct request_sock *req)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
int mssind;
- const __u16 mss = *mssp;
+ const __u16 mss = req->mss;
tcp_synq_overflow(sk);
for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
;
- *mssp = msstab[mssind] + 1;
+ req->mss = msstab[mssind] + 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4517630..2717bcb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1219,7 +1219,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
TCP_ECN_create_request(req, tcp_hdr(skb));
if (want_cookie) {
- isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+ isn = cookie_v6_init_sequence(sk, skb, req);
req->cookie_ts = tmp_opt.tstamp_ok;
} else if (!isn) {
if (ipv6_opt_accepted(sk, skb) ||
next prev parent reply other threads:[~2009-10-16 5:02 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-13 5:07 TCP_DEFER_ACCEPT is missing counter update Willy Tarreau
2009-10-13 7:11 ` David Miller
2009-10-13 7:19 ` Willy Tarreau
2009-10-13 7:27 ` David Miller
2009-10-13 21:27 ` Julian Anastasov
2009-10-14 4:52 ` Willy Tarreau
2009-10-14 7:27 ` Julian Anastasov
2009-10-14 20:17 ` Willy Tarreau
2009-10-14 21:12 ` Olaf van der Spek
2009-10-14 22:43 ` David Miller
2009-10-15 6:08 ` Willy Tarreau
2009-10-15 8:47 ` Julian Anastasov
2009-10-15 12:41 ` Willy Tarreau
2009-10-15 22:44 ` Julian Anastasov
2009-10-16 3:51 ` Eric Dumazet
2009-10-16 5:00 ` Eric Dumazet [this message]
2009-10-16 5:29 ` Willy Tarreau
2009-10-16 6:05 ` Eric Dumazet
2009-10-16 6:18 ` Willy Tarreau
2009-10-16 7:08 ` Eric Dumazet
2009-10-16 7:19 ` Willy Tarreau
2009-10-16 5:03 ` Willy Tarreau
2009-10-16 8:49 ` Julian Anastasov
2009-10-16 10:40 ` Eric Dumazet
2009-10-16 19:27 ` Willy Tarreau
2009-10-17 11:48 ` Julian Anastasov
2009-10-17 12:07 ` Eric Dumazet
2009-10-17 14:20 ` Julian Anastasov
2009-10-19 20:01 ` Eric Dumazet
2009-10-19 20:11 ` Willy Tarreau
2009-10-19 20:17 ` Eric Dumazet
2009-10-20 2:23 ` David Miller
2009-10-15 7:59 ` Julian Anastasov
2009-10-16 10:08 ` Ilpo Järvinen
2009-10-13 7:23 ` Eric Dumazet
2009-10-13 7:34 ` Willy Tarreau
2009-10-13 8:08 ` Olaf van der Spek
2009-10-13 8:29 ` Eric Dumazet
2009-10-13 8:35 ` David Miller
2009-10-13 7:35 ` David Miller
2009-10-13 8:12 ` Willy Tarreau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4AD7FE01.1010805@gmail.com \
--to=eric.dumazet@gmail.com \
--cc=davem@davemloft.net \
--cc=ja@ssi.bg \
--cc=netdev@vger.kernel.org \
--cc=w@1wt.eu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).