From: Ben Woodard <woodard@redhat.com>
To: "YOSHIFUJI Hideaki / 吉藤英明" <yoshfuji@linux-ipv6.org>
Cc: vladislav.yasevich@hp.com, davem@davemloft.net,
netdev@vger.kernel.org, mgrondona@llnl.gov, behlendorf1@llnl.gov
Subject: Re: [PATCH] Customizable TCP backoff patch
Date: Thu, 12 Oct 2006 08:49:25 -0700 [thread overview]
Message-ID: <452E6405.1010607@redhat.com> (raw)
In-Reply-To: <20061012.100615.18914547.yoshfuji@linux-ipv6.org>
[-- Attachment #1: Type: text/plain, Size: 933 bytes --]
YOSHIFUJI Hideaki / 吉藤英明 wrote:
>> + .data = &sysctl_tcp_rto_max,
>> + .maxlen = sizeof(unsigned),
>
> sizeof(unsigned long)
Good catch. That would have corrupted things badly on some 64b
platforms. With all the flux in the area I forgot to change the size of
that but would have been OK on the ia32 boxes.
>> diff -ru linux-2.6.18/net/ipv4/tcp.c linux-2.6.18.new/net/ipv4/tcp.c
>> --- linux-2.6.18/net/ipv4/tcp.c 2006-09-19 20:42:06.000000000 -0700
>> +++ linux-2.6.18.new/net/ipv4/tcp.c 2006-10-11 16:00:37.000000000 -0700
>> @@ -2110,6 +2126,12 @@
>> if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
>> return -EFAULT;
>> return 0;
>> + case TCP_BACKOFF_MAX:
>> + val = jiffies_to_msecs(tcp_rto_max(tp));
>> + break;
>
> tp->rto_max
>
>> + case TCP_BACKOFF_INIT:
>> + val = jiffies_to_msecs(tcp_rto_init(tp));
>> + break;
>> default:
>
> tp->rto_init
OK I get it now.
>
> --yoshfuji
[-- Attachment #2: tcp-backoff-18-6.diff --]
[-- Type: text/x-patch, Size: 6881 bytes --]
diff -ru linux-2.6.18/include/linux/sysctl.h linux-2.6.18.new/include/linux/sysctl.h
--- linux-2.6.18/include/linux/sysctl.h 2006-09-19 20:42:06.000000000 -0700
+++ linux-2.6.18.new/include/linux/sysctl.h 2006-10-11 10:27:52.000000000 -0700
@@ -411,6 +411,8 @@
NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
NET_TCP_DMA_COPYBREAK=116,
NET_TCP_SLOW_START_AFTER_IDLE=117,
+ NET_TCP_RTO_MAX=118,
+ NET_TCP_RTO_INIT=119,
};
enum {
diff -ru linux-2.6.18/include/linux/tcp.h linux-2.6.18.new/include/linux/tcp.h
--- linux-2.6.18/include/linux/tcp.h 2006-09-19 20:42:06.000000000 -0700
+++ linux-2.6.18.new/include/linux/tcp.h 2006-10-12 08:28:52.645411000 -0700
@@ -94,6 +94,8 @@
#define TCP_INFO 11 /* Information about this connection. */
#define TCP_QUICKACK 12 /* Block/reenable quick acks */
#define TCP_CONGESTION 13 /* Congestion control algorithm */
+#define TCP_BACKOFF_MAX 14 /* Maximum backoff value */
+#define TCP_BACKOFF_INIT 15 /* Initial backoff value */
#define TCPI_OPT_TIMESTAMPS 1
#define TCPI_OPT_SACK 2
@@ -257,6 +259,8 @@
__u8 frto_counter; /* Number of new acks after RTO */
__u8 nonagle; /* Disable Nagle algorithm? */
__u8 keepalive_probes; /* num of allowed keep alive probes */
+ __u16 rto_max; /* Maximum backoff value in ms */
+ __u16 rto_init; /* Initial backoff value in ms */
/* RTT measurement */
__u32 srtt; /* smoothed round trip time << 3 */
Only in linux-2.6.18.new/include/linux: tcp.h~
diff -ru linux-2.6.18/include/net/tcp.h linux-2.6.18.new/include/net/tcp.h
--- linux-2.6.18/include/net/tcp.h 2006-09-19 20:42:06.000000000 -0700
+++ linux-2.6.18.new/include/net/tcp.h 2006-10-11 17:43:23.091431000 -0700
@@ -227,11 +227,23 @@
extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
+extern unsigned long sysctl_tcp_rto_max;
+extern unsigned long sysctl_tcp_rto_init;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
+static inline unsigned long tcp_rto_max(struct tcp_sock *tp)
+{
+ return tp->rto_max ? msecs_to_jiffies(tp->rto_max) : sysctl_tcp_rto_max;
+}
+
+static inline unsigned long tcp_rto_init(struct tcp_sock *tp)
+{
+ return tp->rto_init ? msecs_to_jiffies(tp->rto_init) : sysctl_tcp_rto_init;
+}
+
/*
* The next routines deal with comparing 32 bit unsigned ints
* and worry about wraparound (automatic with unsigned arithmetic).
Only in linux-2.6.18.new/include/net: tcp.h~
diff -ru linux-2.6.18/net/ipv4/sysctl_net_ipv4.c linux-2.6.18.new/net/ipv4/sysctl_net_ipv4.c
--- linux-2.6.18/net/ipv4/sysctl_net_ipv4.c 2006-09-19 20:42:06.000000000 -0700
+++ linux-2.6.18.new/net/ipv4/sysctl_net_ipv4.c 2006-10-12 07:14:41.871910000 -0700
@@ -128,6 +128,8 @@
return ret;
}
+static unsigned long tcp_rto_min=0;
+static unsigned long tcp_rto_max=65535;
ctl_table ipv4_table[] = {
{
@@ -697,6 +699,26 @@
.mode = 0644,
.proc_handler = &proc_dointvec
},
+ {
+ .ctl_name = NET_TCP_RTO_MAX,
+ .procname = "tcp_rto_max",
+ .data = &sysctl_tcp_rto_max,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = &tcp_rto_min_constant,
+ .extra2 = &tcp_rto_max_constant,
+ },
+ {
+ .ctl_name = NET_TCP_RTO_INIT,
+ .procname = "tcp_rto_init",
+ .data = &sysctl_tcp_rto_init,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = &tcp_rto_min_constant,
+ .extra2 = &tcp_rto_max_constant,
+ },
{ .ctl_name = 0 }
};
Only in linux-2.6.18.new/net/ipv4: sysctl_net_ipv4.c~
diff -ru linux-2.6.18/net/ipv4/tcp.c linux-2.6.18.new/net/ipv4/tcp.c
--- linux-2.6.18/net/ipv4/tcp.c 2006-09-19 20:42:06.000000000 -0700
+++ linux-2.6.18.new/net/ipv4/tcp.c 2006-10-12 07:18:01.193083000 -0700
@@ -1764,6 +1764,8 @@
return err;
}
+#define TCP_BACKOFF_MAXVAL 65535
+
/*
* Socket option code for TCP.
*/
@@ -1939,6 +1941,20 @@
}
break;
+ case TCP_BACKOFF_MAX:
+ if (val < 1 || val > TCP_BACKOFF_MAXVAL)
+ err = -EINVAL;
+ else
+ tp->rto_max = val;
+ break;
+
+ case TCP_BACKOFF_INIT:
+ if (val < 1 || val > TCP_BACKOFF_MAXVAL)
+ err = -EINVAL;
+ else
+ tp->rto_init = val;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -2110,6 +2126,12 @@
if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
return -EFAULT;
return 0;
+ case TCP_BACKOFF_MAX:
+ val = tp->rto_max;
+ break;
+ case TCP_BACKOFF_INIT:
+ val = tp->rto_init;
+ break;
default:
return -ENOPROTOOPT;
};
Only in linux-2.6.18.new/net/ipv4: tcp.c~
diff -ru linux-2.6.18/net/ipv4/tcp_timer.c linux-2.6.18.new/net/ipv4/tcp_timer.c
--- linux-2.6.18/net/ipv4/tcp_timer.c 2006-09-19 20:42:06.000000000 -0700
+++ linux-2.6.18.new/net/ipv4/tcp_timer.c 2006-10-11 10:41:39.000000000 -0700
@@ -31,6 +31,8 @@
int sysctl_tcp_retries1 = TCP_RETR1;
int sysctl_tcp_retries2 = TCP_RETR2;
int sysctl_tcp_orphan_retries;
+unsigned long sysctl_tcp_rto_max = TCP_RTO_MAX;
+unsigned long sysctl_tcp_rto_init = TCP_TIMEOUT_INIT;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
@@ -71,7 +73,8 @@
/* If peer does not open window for long time, or did not transmit
* anything for long time, penalize it. */
- if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*tcp_rto_max(tp) ||
+ !do_reset)
orphans <<= 1;
/* If some dubious ICMP arrived, penalize even more. */
@@ -256,8 +259,8 @@
max_probes = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
- const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
-
+ const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) <
+ tcp_rto_max(tp));
max_probes = tcp_orphan_retries(sk, alive);
if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
@@ -301,7 +304,7 @@
inet->num, tp->snd_una, tp->snd_nxt);
}
#endif
- if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
+ if (tcp_time_stamp - tp->rcv_tstamp > tcp_rto_max(tp)) {
tcp_write_err(sk);
goto out;
}
@@ -373,7 +376,8 @@
out_reset_timer:
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
+ tcp_rto_max(tp));
if (icsk->icsk_retransmits > sysctl_tcp_retries1)
__sk_dst_reset(sk);
@@ -427,8 +431,8 @@
static void tcp_synack_timer(struct sock *sk)
{
- inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+ inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, TCP_TIMEOUT_INIT,
+ tcp_rto_max(tcp_sk(sk)));
}
void tcp_set_keepalive(struct sock *sk, int val)
prev parent reply other threads:[~2006-10-12 15:49 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-09-27 18:52 [PATCH] Customizable TCP backoff patch Ben Woodard
2006-09-27 23:16 ` David Miller
2006-09-27 23:00 ` Stephen Hemminger
2006-09-28 2:06 ` David Miller
2006-10-03 18:14 ` Ben Woodard
2006-10-04 7:07 ` David Miller
2006-10-04 8:56 ` Ingo Oeser
2006-10-04 9:08 ` David Miller
2006-10-04 17:17 ` Stephen Hemminger
2006-10-11 1:46 ` Ben Woodard
2006-10-11 1:59 ` YOSHIFUJI Hideaki / 吉藤英明
2006-10-11 2:53 ` David Miller
2006-10-11 2:54 ` David Miller
2006-10-11 14:01 ` Vlad Yasevich
2006-10-12 0:51 ` Ben Woodard
2006-10-12 1:06 ` YOSHIFUJI Hideaki / 吉藤英明
2006-10-12 15:49 ` Ben Woodard [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=452E6405.1010607@redhat.com \
--to=woodard@redhat.com \
--cc=behlendorf1@llnl.gov \
--cc=davem@davemloft.net \
--cc=mgrondona@llnl.gov \
--cc=netdev@vger.kernel.org \
--cc=vladislav.yasevich@hp.com \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.