From: Andreas Petlund <apetlund@simula.no>
To: "netdev@vger.kernel.org" <netdev@vger.kernel.org>
Cc: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>,
"Eric Dumazet" <eric.dumazet@gmail.com>,
"Arnd Hannemann" <hannemann@nets.rwth-aachen.de>,
LKML <linux-kernel@vger.kernel.org>,
shemminger@vyatta.com, "David Miller" <davem@davemloft.net>,
william.allen.simpson@gmail.com, damian@tvk.rwth-aachen.de
Subject: [net-next PATCH v3 2/3] net: TCP thin linear timeouts
Date: Thu, 11 Feb 2010 13:07:41 +0100 [thread overview]
Message-ID: <4B73F30D.6040205@simula.no> (raw)
Major changes:
-Possible to disable mechanisms by socket option
-Socket option value boundary check
Signed-off-by: Andreas Petlund <apetlund@simula.no>
---
include/linux/sysctl.h | 1 +
include/linux/tcp.h | 3 +++
include/net/tcp.h | 4 ++++
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
net/ipv4/tcp.c | 7 +++++++
net/ipv4/tcp_timer.c | 19 ++++++++++++++++++-
6 files changed, 40 insertions(+), 1 deletions(-)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9f236cd..d840d75 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -425,6 +425,7 @@ enum
NET_TCP_ALLOWED_CONG_CONTROL=123,
NET_TCP_MAX_SSTHRESH=124,
NET_TCP_FRTO_RESPONSE=125,
+ NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
};
enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7fee8a4..67da706 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -103,6 +103,7 @@ enum {
#define TCP_CONGESTION 13 /* Congestion control algorithm */
#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */
+#define TCP_THIN_LT 16 /* Use linear timeouts for thin streams*/
/* for TCP_INFO socket option */
#define TCPI_OPT_TIMESTAMPS 1
@@ -341,6 +342,8 @@ struct tcp_sock {
u16 advmss; /* Advertised MSS */
u8 frto_counter; /* Number of new acks after RTO */
u8 nonagle; /* Disable Nagle algorithm? */
+ u8 thin_lt : 1,/* Use linear timeouts for thin streams */
+ thin_undef : 7;
/* RTT measurement */
u32 srtt; /* smoothed round trip time << 3 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e5e2056..bc5856a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_NAGLE_CORK 2 /* Socket is corked */
#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
+/* TCP thin-stream limits */
+#define TCP_THIN_LT_RETRIES 6 /* After 6 linear retries, do exp. backoff */
+
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */
@@ -241,6 +244,7 @@ extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_cookie_size;
+extern int sysctl_tcp_force_thin_linear_timeouts;
extern atomic_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..cb2ed35 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -576,6 +576,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_force_thin_linear_timeouts",
+ .data = &sysctl_tcp_force_thin_linear_timeouts,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5d69ea..ce9aeb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
}
break;
+ case TCP_THIN_LT:
+ if (val < 0 || val > 1)
+ err = -EINVAL;
+ else
+ tp->thin_lt = val;
+ break;
+
case TCP_CORK:
/* When set indicates to always queue non-full frames.
* Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index de7d1bf..a682479 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
+int sysctl_tcp_force_thin_linear_timeouts __read_mostly;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,23 @@ void tcp_retransmit_timer(struct sock *sk)
icsk->icsk_retransmits++;
out_reset_timer:
- icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
+ * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
+ * might be increased if the stream oscillates between thin and thick,
+ * thus the old value might already be too high compared to the value
+ * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
+ * backoff. Limit to TCP_THIN_LT_RETRIES before initiating exponential
+ * backoff behaviour to avoid continue hammering linear-timeout
+ * retransmissions into a black hole*/
+ if ((tp->thin_lt || sysctl_tcp_force_thin_linear_timeouts) &&
+ tcp_stream_is_thin(sk) && sk->sk_state == TCP_ESTABLISHED &&
+ icsk->icsk_retransmits <= TCP_THIN_LT_RETRIES) {
+ icsk->icsk_backoff = 0;
+ icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ } else {
+ /* Use normal (exponential) backoff */
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ }
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
__sk_dst_reset(sk);
--
1.6.3.3
next reply other threads:[~2010-02-11 12:07 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-11 12:07 Andreas Petlund [this message]
2010-02-12 3:52 ` [net-next PATCH v3 2/3] net: TCP thin linear timeouts Eric Dumazet
2010-02-13 15:49 ` Andreas Petlund
2010-02-12 11:19 ` William Allen Simpson
2010-02-13 15:50 ` Andreas Petlund
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B73F30D.6040205@simula.no \
--to=apetlund@simula.no \
--cc=damian@tvk.rwth-aachen.de \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=hannemann@nets.rwth-aachen.de \
--cc=ilpo.jarvinen@helsinki.fi \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=shemminger@vyatta.com \
--cc=william.allen.simpson@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).