netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org
Cc: Jesper Dangaard Brouer <brouer@redhat.com>,
	Nandita Dukkipati <nanditad@google.com>,
	Eric Dumazet <eric.dumazet@gmail.com>
Subject: [PATCH] tcp: sysctl for initial receive window
Date: Fri, 21 Sep 2012 10:55:02 +0200	[thread overview]
Message-ID: <20120921085502.4534.20232.stgit@dragon> (raw)

Make it possible to adjust the TCP default initial advertised receive
window, via sysctl /proc/sys/net/ipv4/tcp_init_recv_window.

The window size is this value multiplied by the MSS of the connection.
The default value is (still) 10, as descibed in commit 356f039822b
(TCP: increase default initial receive window.)

Allow minimum value of 1, but recommend against setting value below 2
in the documentation.

Its possible to control/override this value per route table entry via
the iproute2 option initrwnd.  Having the global default exported via
sysctl, helps determine the default setting, and make is easier to
adjust.

Cc: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---

 Documentation/networking/ip-sysctl.txt |   12 ++++++++++++
 include/net/tcp.h                      |    1 +
 net/ipv4/sysctl_net_ipv4.c             |    9 +++++++++
 net/ipv4/tcp_input.c                   |    6 +++---
 net/ipv4/tcp_output.c                  |    8 +++++---
 5 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index c7fc107..684131c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -257,6 +257,18 @@ tcp_frto_response - INTEGER
 		  to the values prior timeout
 	Default: 0 (rate halving based)
 
+tcp_init_recv_window - INTEGER
+	Default initial advertised receive window.  Actual window size
+	is this value multiplied by the MSS of the connection.  Its
+	possible to control/override this value per route table entry
+	via the iproute2 option initrwnd.
+	Minimum value is 1, but 2 is the recommended minimum.
+	The effective max value, is limited by the sockets receive
+	buffer size (default tcp_rmem[1], and possibly scaled by
+	tcp_adv_win_scale), and can further be limited by window
+	clamp.
+	Default: 10
+
 tcp_keepalive_time - INTEGER
 	How often TCP sends out keepalive messages when keepalive is enabled.
 	Default: 2hours.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8cb00c..3334852 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -292,6 +292,7 @@ extern int sysctl_tcp_thin_dupack;
 extern int sysctl_tcp_early_retrans;
 extern int sysctl_tcp_limit_output_bytes;
 extern int sysctl_tcp_challenge_ack_limit;
+extern u32 sysctl_tcp_init_recv_window;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9205e49..9bb6608 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,6 +27,7 @@
 #include <net/tcp_memcontrol.h>
 
 static int zero;
+static int one = 1;
 static int two = 2;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
@@ -794,6 +795,14 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero
 	},
+	{
+		.procname	= "tcp_init_recv_window",
+		.data		= &sysctl_tcp_init_recv_window,
+		.maxlen		= sizeof(sysctl_tcp_init_recv_window),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e2bec81..bbf7a33 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -356,14 +356,14 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 static void tcp_fixup_rcvbuf(struct sock *sk)
 {
 	u32 mss = tcp_sk(sk)->advmss;
-	u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+	u32 icwnd = sysctl_tcp_init_recv_window;
 	int rcvmem;
 
-	/* Limit to 10 segments if mss <= 1460,
+	/* Limit to default 10 segments if mss <= 1460,
 	 * or 14600/mss segments, with a minimum of two segments.
 	 */
 	if (mss > 1460)
-		icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+		icwnd = max_t(u32, (1460 * icwnd) / mss, 2);
 
 	rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
 	while (tcp_win_from_space(rcvmem) < mss)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe6ffe..5f3b26d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,8 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
  */
 int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
+u32 sysctl_tcp_init_recv_window __read_mostly = TCP_DEFAULT_INIT_RCVWND;
+
 int sysctl_tcp_mtu_probing __read_mostly = 0;
 int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
 
@@ -235,14 +237,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	}
 
 	/* Set initial window to a value enough for senders starting with
-	 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
+	 * initial congestion window of sysctl_tcp_init_recv_window. Place
 	 * a limit on the initial window when mss is larger than 1460.
 	 */
 	if (mss > (1 << *rcv_wscale)) {
-		int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
+		int init_cwnd = sysctl_tcp_init_recv_window;
 		if (mss > 1460)
 			init_cwnd =
-			max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+			max_t(u32, (1460 * init_cwnd) / mss, 2);
 		/* when initializing use the value from init_rcv_wnd
 		 * rather than the default from above
 		 */

             reply	other threads:[~2012-09-21  8:48 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-21  8:55 Jesper Dangaard Brouer [this message]
2012-09-21 15:25 ` [PATCH] tcp: sysctl for initial receive window Eric Dumazet
2012-09-21 17:34   ` Jesper Dangaard Brouer
2012-09-21 17:56   ` David Miller
2012-09-21 18:32     ` Jesper Dangaard Brouer
2012-09-21 18:48       ` David Miller
2012-09-26 11:53         ` Eric Dumazet
2012-10-01 22:36           ` Yuchung Cheng
2012-09-25  5:29 ` Jan Engelhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120921085502.4534.20232.stgit@dragon \
    --to=brouer@redhat.com \
    --cc=eric.dumazet@gmail.com \
    --cc=nanditad@google.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).