netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Add sysctl to set the advertised TCP initial receive window.
@ 2009-12-08 22:40 chavey
  2009-12-08 23:00 ` Joe Perches
  2009-12-09 13:26 ` Andi Kleen
  0 siblings, 2 replies; 16+ messages in thread
From: chavey @ 2009-12-08 22:40 UTC (permalink / raw)
  To: davem; +Cc: netdev, therbert, chavey

Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
size advertised by passive and active TCP connections. 
The current Linux TCP implementation limits the advertised TCP initial
receive window to the one proscribed by slow start. For short leave
TCP connections used for transaction type of traffic (i.e. http
requests), bounding the advertised TCP initial receive window results
in increased latency to complete the transaction. There exists
environments where strict adherence to using the TCP initial receive
window used by slow start is un-necessary. 
The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
window for all TCP connections or on a per TCP connection, allowing
for some of the TCP connection to advertise larger TCP receive window
than the ones bounded by slow start.

Signed-off-by: Laurent Chavey <chavey@google.com>
---

 Documentation/networking/ip-sysctl.txt |    6 ++++++
 include/linux/tcp.h                    |    2 ++
 include/net/tcp.h                      |    7 ++++++-
 net/ipv4/inet_connection_sock.c        |    5 +++++
 net/ipv4/syncookies.c                  |    2 +-
 net/ipv4/sysctl_net_ipv4.c             |    8 ++++++++
 net/ipv4/tcp.c                         |   10 ++++++++++
 net/ipv4/tcp_ipv4.c                    |    3 ++-
 net/ipv4/tcp_output.c                  |   20 ++++++++++++++++----
 net/ipv6/syncookies.c                  |    3 +--
 10 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fbe427a..7224d12 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -479,6 +479,12 @@ tcp_dma_copybreak - INTEGER
 	and CONFIG_NET_DMA is enabled.
 	Default: 4096
 
+tcp_init_rcv_wnd - INTEGER
+	Initial receive window, in MSS, advertised by an active or passive
+	tcp socket. Use a value from 0 to TCP_INIT_RCV_WND_MAX. When
+	set to 0, use an initial receive window following RFC2414.
+	Default: 0
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 61723a7..4a622a0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -96,6 +96,7 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_INIT_RCV_WND	15	/* Passive connection receive window */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -221,6 +222,7 @@ struct tcp_options_received {
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
 };
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..5c2e3db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,6 +65,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* Minimal RCV_MSS. */
 #define TCP_MIN_RCVMSS		536U
 
+/* TCP initial receive window. Maximum number of mss allowed. */
+#define TCP_INIT_RCV_WND_MAX	16
+
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		512
 
@@ -237,6 +240,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_init_rcv_wnd;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -972,7 +976,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 /* Determine a window scaling and initial window to offer. */
 extern void tcp_select_initial_window(int __space, __u32 mss,
 				      __u32 *rcv_wnd, __u32 *window_clamp,
-				      int wscale_ok, __u8 *rcv_wscale);
+				      int wscale_ok, __u8 *rcv_wscale,
+				      struct tcp_sock *tp);
 
 static inline int tcp_win_from_space(int space)
 {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 537731b..9766d43 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -20,6 +20,7 @@
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
@@ -628,11 +629,15 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
 
 	if (rc != 0)
 		return rc;
 
+	if (tp->rx_opt.init_rcv_wnd == 0)
+		tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..fa4ed8c 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -356,7 +356,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale  = rcv_wscale;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..85232fc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -656,6 +656,14 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_init_rcv_wnd",
+		.data 		= &sysctl_tcp_init_rcv_wnd,
+		.maxlen 	= sizeof(int),
+		.mode 		= 0644,
+		.proc_handler 	= &proc_dointvec,
+	},
 #ifdef CONFIG_NETLABEL
 	{
 		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1813bc..7567edd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2248,6 +2248,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 #endif
 
+	case TCP_INIT_RCV_WND:
+		val = min_t(int, val, TCP_INIT_RCV_WND_MAX);
+		tp->rx_opt.init_rcv_wnd = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2425,6 +2430,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
+
+	case TCP_INIT_RCV_WND:
+		val = tp->rx_opt.init_rcv_wnd;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7cda24b..1611e95 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1829,6 +1829,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
+	tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	local_bh_disable();
 	percpu_counter_inc(&tcp_sockets_allocated);
 	local_bh_enable();
@@ -2493,4 +2495,3 @@ EXPORT_SYMBOL(tcp_proc_register);
 EXPORT_SYMBOL(tcp_proc_unregister);
 #endif
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a..ec8b153 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,9 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+/* Initial advertised receive window. Enabled using a non '0' value.*/
+int sysctl_tcp_init_rcv_wnd __read_mostly = 0;
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
@@ -179,7 +182,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  */
 void tcp_select_initial_window(int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
-			       int wscale_ok, __u8 *rcv_wscale)
+			       int wscale_ok, __u8 *rcv_wscale,
+			       struct tcp_sock *tp)
 {
 	unsigned int space = (__space < 0 ? 0 : __space);
 
@@ -228,7 +232,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd * mss)
+		/* when initializing use the value from init_rcv_wnd
+		 * rather than the default from above
+		 */
+		if (tp && tp->rx_opt.init_rcv_wnd &&
+		    (*rcv_wnd > tp->rx_opt.init_rcv_wnd * mss))
+			*rcv_wnd = tp->rx_opt.init_rcv_wnd * mss;
+		else if (*rcv_wnd > init_cwnd * mss)
 			*rcv_wnd = init_cwnd * mss;
 	}
 
@@ -2254,7 +2264,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
-			&rcv_wscale);
+			&rcv_wscale,
+			tp);
 		ireq->rcv_wscale = rcv_wscale;
 	}
 
@@ -2342,7 +2353,8 @@ static void tcp_connect_init(struct sock *sk)
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &rcv_wscale);
+				  &rcv_wscale,
+				  tp);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae91..062c730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -267,7 +267,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale = rcv_wscale;
 
@@ -278,4 +278,3 @@ out_free:
 	reqsk_free(req);
 	return NULL;
 }
-

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-08 22:40 chavey
@ 2009-12-08 23:00 ` Joe Perches
  2009-12-09 13:26 ` Andi Kleen
  1 sibling, 0 replies; 16+ messages in thread
From: Joe Perches @ 2009-12-08 23:00 UTC (permalink / raw)
  To: chavey; +Cc: davem, netdev, therbert

On Tue, 2009-12-08 at 14:40 -0800, chavey@google.com wrote:
> Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
> size advertised by passive and active TCP connections. 
[]
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 2dcf04d..85232fc 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -656,6 +656,14 @@ static struct ctl_table ipv4_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +	{
> +		.ctl_name	= CTL_UNNUMBERED,
> +		.procname	= "tcp_init_rcv_wnd",
> +		.data 		= &sysctl_tcp_init_rcv_wnd,
> +		.maxlen 	= sizeof(int),
> +		.mode 		= 0644,
> +		.proc_handler 	= &proc_dointvec,

		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &SOMESTATIC_FOR_TCP_INIT_RCV_WND_MAX,

?



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] Add sysctl to set the advertised TCP initial receive window.
@ 2009-12-09  0:30 chavey
  2009-12-09  0:59 ` Joe Perches
  0 siblings, 1 reply; 16+ messages in thread
From: chavey @ 2009-12-09  0:30 UTC (permalink / raw)
  To: davem; +Cc: netdev, therbert, chavey, joe

Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
size advertised by passive and active TCP connections.
The current Linux TCP implementation limits the advertised TCP initial
receive window to the one proscribed by slow start. For short leave
TCP connections used for transaction type of traffic (i.e. http
requests), bounding the advertised TCP initial receive window results
in increased latency to complete the transaction. There exists
environments where strict adherence to using the TCP initial receive
window used by slow start is un-necessary.
The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
window for all TCP connections or on a per TCP connection, allowing
for some of the TCP connection to advertise larger TCP receive window
than the ones bounded by slow start.

Signed-off-by: Laurent Chavey <chavey@google.com>
---
 Documentation/networking/ip-sysctl.txt |    6 ++++++
 include/linux/tcp.h                    |    2 ++
 include/net/tcp.h                      |    7 ++++++-
 net/ipv4/inet_connection_sock.c        |    5 +++++
 net/ipv4/syncookies.c                  |    2 +-
 net/ipv4/sysctl_net_ipv4.c             |   10 ++++++++++
 net/ipv4/tcp.c                         |   10 ++++++++++
 net/ipv4/tcp_ipv4.c                    |    3 ++-
 net/ipv4/tcp_output.c                  |   20 ++++++++++++++++----
 net/ipv6/syncookies.c                  |    3 +--
 10 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fbe427a..7224d12 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -479,6 +479,12 @@ tcp_dma_copybreak - INTEGER
 	and CONFIG_NET_DMA is enabled.
 	Default: 4096
 
+tcp_init_rcv_wnd - INTEGER
+	Initial receive window, in MSS, advertised by an active or passive
+	tcp socket. Use a value from 0 to TCP_INIT_RCV_WND_MAX. When
+	set to 0, use an initial receive window following RFC2414.
+	Default: 0
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 61723a7..4a622a0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -96,6 +96,7 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_INIT_RCV_WND	15	/* Passive connection receive window */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -221,6 +222,7 @@ struct tcp_options_received {
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
 };
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..5c2e3db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,6 +65,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* Minimal RCV_MSS. */
 #define TCP_MIN_RCVMSS		536U
 
+/* TCP initial receive window. Maximum number of mss allowed. */
+#define TCP_INIT_RCV_WND_MAX	16
+
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		512
 
@@ -237,6 +240,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_init_rcv_wnd;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -972,7 +976,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 /* Determine a window scaling and initial window to offer. */
 extern void tcp_select_initial_window(int __space, __u32 mss,
 				      __u32 *rcv_wnd, __u32 *window_clamp,
-				      int wscale_ok, __u8 *rcv_wscale);
+				      int wscale_ok, __u8 *rcv_wscale,
+				      struct tcp_sock *tp);
 
 static inline int tcp_win_from_space(int space)
 {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 537731b..9766d43 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -20,6 +20,7 @@
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
@@ -628,11 +629,15 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
 
 	if (rc != 0)
 		return rc;
 
+	if (tp->rx_opt.init_rcv_wnd == 0)
+		tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..fa4ed8c 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -356,7 +356,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale  = rcv_wscale;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..1257f89 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -656,6 +656,16 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_init_rcv_wnd",
+		.data 		= &sysctl_tcp_init_rcv_wnd,
+		.maxlen 	= sizeof(int),
+		.mode 		= 0644,
+		.proc_handler 	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= TCP_INIT_RCV_WND_MAX
+	},
 #ifdef CONFIG_NETLABEL
 	{
 		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1813bc..7567edd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2248,6 +2248,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 #endif
 
+	case TCP_INIT_RCV_WND:
+		val = min_t(int, val, TCP_INIT_RCV_WND_MAX);
+		tp->rx_opt.init_rcv_wnd = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2425,6 +2430,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
+
+	case TCP_INIT_RCV_WND:
+		val = tp->rx_opt.init_rcv_wnd;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7cda24b..1611e95 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1829,6 +1829,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
+	tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	local_bh_disable();
 	percpu_counter_inc(&tcp_sockets_allocated);
 	local_bh_enable();
@@ -2493,4 +2495,3 @@ EXPORT_SYMBOL(tcp_proc_register);
 EXPORT_SYMBOL(tcp_proc_unregister);
 #endif
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a..ec8b153 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,9 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+/* Initial advertised receive window. Enabled using a non '0' value.*/
+int sysctl_tcp_init_rcv_wnd __read_mostly = 0;
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
@@ -179,7 +182,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  */
 void tcp_select_initial_window(int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
-			       int wscale_ok, __u8 *rcv_wscale)
+			       int wscale_ok, __u8 *rcv_wscale,
+			       struct tcp_sock *tp)
 {
 	unsigned int space = (__space < 0 ? 0 : __space);
 
@@ -228,7 +232,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd * mss)
+		/* when initializing use the value from init_rcv_wnd
+		 * rather than the default from above
+		 */
+		if (tp && tp->rx_opt.init_rcv_wnd &&
+		    (*rcv_wnd > tp->rx_opt.init_rcv_wnd * mss))
+			*rcv_wnd = tp->rx_opt.init_rcv_wnd * mss;
+		else if (*rcv_wnd > init_cwnd * mss)
 			*rcv_wnd = init_cwnd * mss;
 	}
 
@@ -2254,7 +2264,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
-			&rcv_wscale);
+			&rcv_wscale,
+			tp);
 		ireq->rcv_wscale = rcv_wscale;
 	}
 
@@ -2342,7 +2353,8 @@ static void tcp_connect_init(struct sock *sk)
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &rcv_wscale);
+				  &rcv_wscale,
+				  tp);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae91..062c730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -267,7 +267,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale = rcv_wscale;
 
@@ -278,4 +278,3 @@ out_free:
 	reqsk_free(req);
 	return NULL;
 }
-

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-09  0:30 chavey
@ 2009-12-09  0:59 ` Joe Perches
  0 siblings, 0 replies; 16+ messages in thread
From: Joe Perches @ 2009-12-09  0:59 UTC (permalink / raw)
  To: chavey; +Cc: davem, netdev, therbert

On Tue, 2009-12-08 at 16:30 -0800, chavey@google.com wrote:
> Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
> size advertised by passive and active TCP connections.

Hi Laurent.

> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 2dcf04d..1257f89 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -656,6 +656,16 @@ static struct ctl_table ipv4_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +	{
> +		.ctl_name	= CTL_UNNUMBERED,
> +		.procname	= "tcp_init_rcv_wnd",
> +		.data 		= &sysctl_tcp_init_rcv_wnd,
> +		.maxlen 	= sizeof(int),
> +		.mode 		= 0644,
> +		.proc_handler 	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= TCP_INIT_RCV_WND_MAX

This won't work if you use proc_dointvec_minmax.

.extra1 and .extra2 are used as pointers to int,
not the int values themselves.

You'd have to use something like:

static const int zero = 0;
static const int tcp_init_rcv_wnd_max = TCP_INIT_RCV_WND_MAX;

[...]
		.proc_handler 	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &tcp_init_rcv_wnd_max;

For instance, the tcp_retries1 entry:

{
	.procname = "tcp_retries1",
	.data = &sysctl_tcp_retries1,
	.maxlen = sizeof(int),
	.mode = 0644,
	.proc_handler = proc_dointvec_minmax,
	.extra2 = &tcp_retr1_max
}



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-08 22:40 chavey
  2009-12-08 23:00 ` Joe Perches
@ 2009-12-09 13:26 ` Andi Kleen
  2009-12-09 18:05   ` Tom Herbert
       [not found]   ` <65634d660912091001s44016cccq4f2422e613ba9db9@mail.gmail.com>
  1 sibling, 2 replies; 16+ messages in thread
From: Andi Kleen @ 2009-12-09 13:26 UTC (permalink / raw)
  To: chavey; +Cc: davem, netdev, therbert

chavey@google.com writes:

> Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
> size advertised by passive and active TCP connections. 
> The current Linux TCP implementation limits the advertised TCP initial
> receive window to the one proscribed by slow start. For short leave
> TCP connections used for transaction type of traffic (i.e. http
> requests), bounding the advertised TCP initial receive window results
> in increased latency to complete the transaction. There exists
> environments where strict adherence to using the TCP initial receive
> window used by slow start is un-necessary. 

That's not the Internet?

> The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
> window for all TCP connections or on a per TCP connection, allowing
> for some of the TCP connection to advertise larger TCP receive window
> than the ones bounded by slow start.

Traditionally it was very discouraged to expose such settings to users.
There's the danger that lots of people will essentially turn off cwnd
without knowing what they're doing to "make downloads go faster", causing
problems in the public network.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-09 13:26 ` Andi Kleen
@ 2009-12-09 18:05   ` Tom Herbert
       [not found]   ` <65634d660912091001s44016cccq4f2422e613ba9db9@mail.gmail.com>
  1 sibling, 0 replies; 16+ messages in thread
From: Tom Herbert @ 2009-12-09 18:05 UTC (permalink / raw)
  To: Linux Netdev List

On Wed, Dec 9, 2009 at 5:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
> chavey@google.com writes:
>
>> Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
>> size advertised by passive and active TCP connections.
>> The current Linux TCP implementation limits the advertised TCP initial
>> receive window to the one proscribed by slow start. For short leave
>> TCP connections used for transaction type of traffic (i.e. http
>> requests), bounding the advertised TCP initial receive window results
>> in increased latency to complete the transaction. There exists
>> environments where strict adherence to using the TCP initial receive
>> window used by slow start is un-necessary.
>
> That's not the Internet?
>
>> The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
>> window for all TCP connections or on a per TCP connection, allowing
>> for some of the TCP connection to advertise larger TCP receive window
>> than the ones bounded by slow start.
>
> Traditionally it was very discouraged to expose such settings to users.
> There's the danger that lots of people will essentially turn off cwnd
> without knowing what they're doing to "make downloads go faster", causing
> problems in the public network.
>

The dangers to the Internet are not exposed on the receiver side but
more on the sender side and its correct adherence to slow start.  A
large majority of clients on the Internet are currently advertising
initial receive windows much higher than 3*MSS (in many cases 64K or
higher), so the impetus is on server side to do the right thing.

Support for setting initial congestion window is already supported in
the stack, but, as has been pointed out this list, it's pretty useless
without the ability to set a larger initial receive window.  If we can
use larger initial congestion windows on private networks that we know
to be essentially uncongested, this can result in reduced
transactional latency by one or more RTTs.  The performance benefits
should be fairly obvious, but we can provide numbers if necessary.

Thanks,
Tom

> -Andi
>
> --
> ak@linux.intel.com -- Speaking for myself only.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] Add sysctl to set the advertised TCP initial receive window.
@ 2009-12-09 20:13 chavey
  2009-12-09 20:33 ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: chavey @ 2009-12-09 20:13 UTC (permalink / raw)
  To: davem; +Cc: netdev, therbert, chavey, joe

Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
size advertised by passive and active TCP connections.
The current Linux TCP implementation limits the advertised TCP initial
receive window to the one prescribed by slow start. For short lived
TCP connections used for transaction type of traffic (i.e. http
requests), bounding the advertised TCP initial receive window results
in increased latency to complete the transaction. There exists
environments where strict adherence to using the TCP initial receive
window used by slow start is un-necessary.
The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
window for all TCP connections or on a per TCP connection, allowing
for some of the TCP connection to advertise larger TCP receive window
than the ones bounded by slow start. Support for setting initial
congestion window is already supported in the stack but the feature 
is useless without the ability to set a larger initial receive window.

Signed-off-by: Laurent Chavey <chavey@google.com>
---
 Documentation/networking/ip-sysctl.txt |    6 ++++++
 include/linux/tcp.h                    |    2 ++
 include/net/tcp.h                      |    7 ++++++-
 net/ipv4/inet_connection_sock.c        |    5 +++++
 net/ipv4/syncookies.c                  |    2 +-
 net/ipv4/sysctl_net_ipv4.c             |   12 ++++++++++++
 net/ipv4/tcp.c                         |   10 ++++++++++
 net/ipv4/tcp_ipv4.c                    |    3 ++-
 net/ipv4/tcp_output.c                  |   20 ++++++++++++++++----
 net/ipv6/syncookies.c                  |    3 +--
 10 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fbe427a..7224d12 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -479,6 +479,12 @@ tcp_dma_copybreak - INTEGER
 	and CONFIG_NET_DMA is enabled.
 	Default: 4096
 
+tcp_init_rcv_wnd - INTEGER
+	Initial receive window, in MSS, advertised by an active or passive
+	tcp socket. Use a value from 0 to TCP_INIT_RCV_WND_MAX. When
+	set to 0, use an initial receive window following RFC2414.
+	Default: 0
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 61723a7..4a622a0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -96,6 +96,7 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_INIT_RCV_WND	15	/* Passive connection receive window */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -221,6 +222,7 @@ struct tcp_options_received {
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
 };
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..5c2e3db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,6 +65,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* Minimal RCV_MSS. */
 #define TCP_MIN_RCVMSS		536U
 
+/* TCP initial receive window. Maximum number of mss allowed. */
+#define TCP_INIT_RCV_WND_MAX	16
+
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		512
 
@@ -237,6 +240,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_init_rcv_wnd;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -972,7 +976,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 /* Determine a window scaling and initial window to offer. */
 extern void tcp_select_initial_window(int __space, __u32 mss,
 				      __u32 *rcv_wnd, __u32 *window_clamp,
-				      int wscale_ok, __u8 *rcv_wscale);
+				      int wscale_ok, __u8 *rcv_wscale,
+				      struct tcp_sock *tp);
 
 static inline int tcp_win_from_space(int space)
 {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 537731b..9766d43 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -20,6 +20,7 @@
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
@@ -628,11 +629,15 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
 
 	if (rc != 0)
 		return rc;
 
+	if (tp->rx_opt.init_rcv_wnd == 0)
+		tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..fa4ed8c 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -356,7 +356,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale  = rcv_wscale;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..63995d3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -25,6 +25,8 @@ static int zero;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_init_rcv_wnd_max = TCP_INIT_RCV_WND_MAX;
+
 
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
@@ -656,6 +658,16 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_init_rcv_wnd",
+		.data 		= &sysctl_tcp_init_rcv_wnd,
+		.maxlen 	= sizeof(int),
+		.mode 		= 0644,
+		.proc_handler 	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &tcp_init_rcv_wnd_max
+	},
 #ifdef CONFIG_NETLABEL
 	{
 		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1813bc..7567edd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2248,6 +2248,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 #endif
 
+	case TCP_INIT_RCV_WND:
+		val = min_t(int, val, TCP_INIT_RCV_WND_MAX);
+		tp->rx_opt.init_rcv_wnd = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2425,6 +2430,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
+
+	case TCP_INIT_RCV_WND:
+		val = tp->rx_opt.init_rcv_wnd;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7cda24b..1611e95 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1829,6 +1829,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
+	tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	local_bh_disable();
 	percpu_counter_inc(&tcp_sockets_allocated);
 	local_bh_enable();
@@ -2493,4 +2495,3 @@ EXPORT_SYMBOL(tcp_proc_register);
 EXPORT_SYMBOL(tcp_proc_unregister);
 #endif
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a..ec8b153 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,9 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+/* Initial advertised receive window. Enabled using a non '0' value.*/
+int sysctl_tcp_init_rcv_wnd __read_mostly = 0;
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
@@ -179,7 +182,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  */
 void tcp_select_initial_window(int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
-			       int wscale_ok, __u8 *rcv_wscale)
+			       int wscale_ok, __u8 *rcv_wscale,
+			       struct tcp_sock *tp)
 {
 	unsigned int space = (__space < 0 ? 0 : __space);
 
@@ -228,7 +232,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd * mss)
+		/* when initializing use the value from init_rcv_wnd
+		 * rather than the default from above
+		 */
+		if (tp && tp->rx_opt.init_rcv_wnd &&
+		    (*rcv_wnd > tp->rx_opt.init_rcv_wnd * mss))
+			*rcv_wnd = tp->rx_opt.init_rcv_wnd * mss;
+		else if (*rcv_wnd > init_cwnd * mss)
 			*rcv_wnd = init_cwnd * mss;
 	}
 
@@ -2254,7 +2264,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
-			&rcv_wscale);
+			&rcv_wscale,
+			tp);
 		ireq->rcv_wscale = rcv_wscale;
 	}
 
@@ -2342,7 +2353,8 @@ static void tcp_connect_init(struct sock *sk)
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &rcv_wscale);
+				  &rcv_wscale,
+				  tp);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae91..062c730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -267,7 +267,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale = rcv_wscale;
 
@@ -278,4 +278,3 @@ out_free:
 	reqsk_free(req);
 	return NULL;
 }
-

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-09 20:13 [PATCH] Add sysctl to set the advertised TCP initial receive window chavey
@ 2009-12-09 20:33 ` Eric Dumazet
  2009-12-09 20:55   ` Joe Perches
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2009-12-09 20:33 UTC (permalink / raw)
  To: chavey; +Cc: davem, netdev, therbert, joe

Le 09/12/2009 21:13, chavey@google.com a écrit :
> Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
> size advertised by passive and active TCP connections.
> The current Linux TCP implementation limits the advertised TCP initial
> receive window to the one prescribed by slow start. For short lived
> TCP connections used for transaction type of traffic (i.e. http
> requests), bounding the advertised TCP initial receive window results
> in increased latency to complete the transaction. There exists
> environments where strict adherence to using the TCP initial receive
> window used by slow start is un-necessary.
> The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
> window for all TCP connections or on a per TCP connection, allowing
> for some of the TCP connection to advertise larger TCP receive window
> than the ones bounded by slow start. Support for setting initial
> congestion window is already supported in the stack but the feature 
> is useless without the ability to set a larger initial receive window.
> 
> Signed-off-by: Laurent Chavey <chavey@google.com>

> index f1813bc..7567edd 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2248,6 +2248,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>  		break;
>  #endif
>  
> +	case TCP_INIT_RCV_WND:
> +		val = min_t(int, val, TCP_INIT_RCV_WND_MAX);
> +		tp->rx_opt.init_rcv_wnd = val;
> +		break;
> +

If user pass val = -1, you end with init_rcv_wnd = 255

Is it what you want ?

Probably not :)

Minor nit , your subject should be : [PATCH] tcp: Add sysctl to blablabla

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-09 20:33 ` Eric Dumazet
@ 2009-12-09 20:55   ` Joe Perches
  2009-12-09 21:11     ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Joe Perches @ 2009-12-09 20:55 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: chavey, davem, netdev, therbert

On Wed, 2009-12-09 at 21:33 +0100, Eric Dumazet wrote:
> Le 09/12/2009 21:13, chavey@google.com a écrit :
> > Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
> > size advertised by passive and active TCP connections.
> > index f1813bc..7567edd 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -2248,6 +2248,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
> >  		break;
> >  #endif
> >  
> > +	case TCP_INIT_RCV_WND:
> > +		val = min_t(int, val, TCP_INIT_RCV_WND_MAX);
> > +		tp->rx_opt.init_rcv_wnd = val;
> > +		break;
> > +
> 
> If user pass val = -1, you end with init_rcv_wnd = 255
> 
> Is it what you want ?
> Probably not :)

I believe min_t() is superfluous now because:
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &tcp_init_rcv_wnd_max



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-09 20:55   ` Joe Perches
@ 2009-12-09 21:11     ` Eric Dumazet
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2009-12-09 21:11 UTC (permalink / raw)
  To: Joe Perches; +Cc: chavey, davem, netdev, therbert

Le 09/12/2009 21:55, Joe Perches a écrit :
> On Wed, 2009-12-09 at 21:33 +0100, Eric Dumazet wrote:
>> Le 09/12/2009 21:13, chavey@google.com a écrit :
>>> Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
>>> size advertised by passive and active TCP connections.
>>> index f1813bc..7567edd 100644
>>> --- a/net/ipv4/tcp.c
>>> +++ b/net/ipv4/tcp.c
>>> @@ -2248,6 +2248,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>>>  		break;
>>>  #endif
>>>  
>>> +	case TCP_INIT_RCV_WND:
>>> +		val = min_t(int, val, TCP_INIT_RCV_WND_MAX);
>>> +		tp->rx_opt.init_rcv_wnd = val;
>>> +		break;
>>> +
>>
>> If user pass val = -1, you end with init_rcv_wnd = 255
>>
>> Is it what you want ?
>> Probably not :)
> 
> I believe min_t() is superfluous now because:
> +               .proc_handler   = proc_dointvec_minmax,
> +               .extra1         = &zero,
> +               .extra2         = &tcp_init_rcv_wnd_max
> 
> 

It is *not* superfluous, since sysctl_tcp_init_rcv_wnd is ignored if
user called setsockopt(TCP_INIT_RCV_WND) with a non null value :

+	if (tp->rx_opt.init_rcv_wnd == 0)
+		tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;

So its probably better to not silently cap user provided val but report an error.

if (val < 0 || val > TCP_INIT_RCV_WND_MAX)
	err = -EINVAL
else
	tp->rx_opt.init_rcv_wnd = val;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] Add sysctl to set the advertised TCP initial receive window.
@ 2009-12-10  2:05 chavey
  2009-12-10 16:57 ` Stephen Hemminger
  2009-12-13  3:27 ` Eric W. Biederman
  0 siblings, 2 replies; 16+ messages in thread
From: chavey @ 2009-12-10  2:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, therbert, chavey, joe, eric.dumazet

Add a sysctl, tcp_init_rcv_wnd, to set the TCP initial receive window
size advertised by passive and active TCP connections.
The current Linux TCP implementation limits the advertised TCP initial
receive window to the one prescribed by slow start. For short lived
TCP connections used for transaction type of traffic (i.e. http
requests), bounding the advertised TCP initial receive window results
in increased latency to complete the transaction. There exists
environments where strict adherence to using the TCP initial receive
window used by slow start is un-necessary.
The tcp_init_rcv_wnd sysctl allows increasing the TCP initial receive
window for all TCP connections or on a per TCP connection, allowing
for some of the TCP connection to advertise larger TCP receive window
than the ones bounded by slow start. Support for setting initial
congestion window is already supported in the stack but the feature 
is useless without the ability to set a larger initial receive window.

Signed-off-by: Laurent Chavey <chavey@google.com>
---
 Documentation/networking/ip-sysctl.txt |    6 ++++++
 include/linux/tcp.h                    |    2 ++
 include/net/tcp.h                      |    7 ++++++-
 net/ipv4/inet_connection_sock.c        |    5 +++++
 net/ipv4/syncookies.c                  |    2 +-
 net/ipv4/sysctl_net_ipv4.c             |   12 ++++++++++++
 net/ipv4/tcp.c                         |   13 +++++++++++++
 net/ipv4/tcp_ipv4.c                    |    3 ++-
 net/ipv4/tcp_output.c                  |   20 ++++++++++++++++----
 net/ipv6/syncookies.c                  |    3 +--
 10 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fbe427a..7224d12 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -479,6 +479,12 @@ tcp_dma_copybreak - INTEGER
 	and CONFIG_NET_DMA is enabled.
 	Default: 4096
 
+tcp_init_rcv_wnd - INTEGER
+	Initial receive window, in MSS, advertised by an active or passive
+	tcp socket. Use a value from 0 to TCP_INIT_RCV_WND_MAX. When
+	set to 0, use an initial receive window following RFC2414.
+	Default: 0
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 61723a7..4a622a0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -96,6 +96,7 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_INIT_RCV_WND	15	/* Passive connection receive window */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -221,6 +222,7 @@ struct tcp_options_received {
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
 };
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..5c2e3db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,6 +65,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* Minimal RCV_MSS. */
 #define TCP_MIN_RCVMSS		536U
 
+/* TCP initial receive window. Maximum number of mss allowed. */
+#define TCP_INIT_RCV_WND_MAX	16
+
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		512
 
@@ -237,6 +240,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_init_rcv_wnd;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -972,7 +976,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 /* Determine a window scaling and initial window to offer. */
 extern void tcp_select_initial_window(int __space, __u32 mss,
 				      __u32 *rcv_wnd, __u32 *window_clamp,
-				      int wscale_ok, __u8 *rcv_wscale);
+				      int wscale_ok, __u8 *rcv_wscale,
+				      struct tcp_sock *tp);
 
 static inline int tcp_win_from_space(int space)
 {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 537731b..9766d43 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -20,6 +20,7 @@
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
@@ -628,11 +629,15 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
 
 	if (rc != 0)
 		return rc;
 
+	if (tp->rx_opt.init_rcv_wnd == 0)
+		tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..fa4ed8c 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -356,7 +356,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale  = rcv_wscale;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..63995d3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -25,6 +25,8 @@ static int zero;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_init_rcv_wnd_max = TCP_INIT_RCV_WND_MAX;
+
 
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
@@ -656,6 +658,16 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_init_rcv_wnd",
+		.data 		= &sysctl_tcp_init_rcv_wnd,
+		.maxlen 	= sizeof(int),
+		.mode 		= 0644,
+		.proc_handler 	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &tcp_init_rcv_wnd_max
+	},
 #ifdef CONFIG_NETLABEL
 	{
 		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1813bc..25ba3fd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2248,6 +2248,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 #endif
 
+	case TCP_INIT_RCV_WND:
+		if (val < 0 || val > TCP_INIT_RCV_WND_MAX) {
+			err = -EINVAL;
+			break;
+		}
+		tp->rx_opt.init_rcv_wnd = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2425,6 +2433,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
+
+	case TCP_INIT_RCV_WND:
+		val = tp->rx_opt.init_rcv_wnd;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7cda24b..1611e95 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1829,6 +1829,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
+	tp->rx_opt.init_rcv_wnd = sysctl_tcp_init_rcv_wnd;
+
 	local_bh_disable();
 	percpu_counter_inc(&tcp_sockets_allocated);
 	local_bh_enable();
@@ -2493,4 +2495,3 @@ EXPORT_SYMBOL(tcp_proc_register);
 EXPORT_SYMBOL(tcp_proc_unregister);
 #endif
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a..ec8b153 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,9 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+/* Initial advertised receive window. Enabled using a non '0' value.*/
+int sysctl_tcp_init_rcv_wnd __read_mostly = 0;
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
@@ -179,7 +182,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  */
 void tcp_select_initial_window(int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
-			       int wscale_ok, __u8 *rcv_wscale)
+			       int wscale_ok, __u8 *rcv_wscale,
+			       struct tcp_sock *tp)
 {
 	unsigned int space = (__space < 0 ? 0 : __space);
 
@@ -228,7 +232,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd * mss)
+		/* when initializing use the value from init_rcv_wnd
+		 * rather than the default from above
+		 */
+		if (tp && tp->rx_opt.init_rcv_wnd &&
+		    (*rcv_wnd > tp->rx_opt.init_rcv_wnd * mss))
+			*rcv_wnd = tp->rx_opt.init_rcv_wnd * mss;
+		else if (*rcv_wnd > init_cwnd * mss)
 			*rcv_wnd = init_cwnd * mss;
 	}
 
@@ -2254,7 +2264,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
-			&rcv_wscale);
+			&rcv_wscale,
+			tp);
 		ireq->rcv_wscale = rcv_wscale;
 	}
 
@@ -2342,7 +2353,8 @@ static void tcp_connect_init(struct sock *sk)
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &rcv_wscale);
+				  &rcv_wscale,
+				  tp);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae91..062c730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -267,7 +267,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale, tp);
 
 	ireq->rcv_wscale = rcv_wscale;
 
@@ -278,4 +278,3 @@ out_free:
 	reqsk_free(req);
 	return NULL;
 }
-

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
       [not found]   ` <65634d660912091001s44016cccq4f2422e613ba9db9@mail.gmail.com>
@ 2009-12-10  2:21     ` David Miller
  2009-12-10 21:55       ` Laurent Chavey
  0 siblings, 1 reply; 16+ messages in thread
From: David Miller @ 2009-12-10  2:21 UTC (permalink / raw)
  To: therbert; +Cc: andi, chavey, netdev

From: Tom Herbert <therbert@google.com>
Date: Wed, 9 Dec 2009 10:01:24 -0800

> If we can use larger initial congestion windows on private networks
> that we know to be

With the keyword here being "network", which is why this must be a
route attribute not a sysctl with global unilatteral effect.

Otherwise I can tell you what people with both forward and inward
facing interfaces are going to do, they'll turn the thing on globally
with your sysctl even though they process real Internet traffic.

No way, we're not going to make it so easy to screw things up.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-10  2:05 chavey
@ 2009-12-10 16:57 ` Stephen Hemminger
  2009-12-10 18:55   ` Joe Perches
  2009-12-13  3:27 ` Eric W. Biederman
  1 sibling, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2009-12-10 16:57 UTC (permalink / raw)
  To: chavey; +Cc: davem, netdev, therbert, chavey, joe, eric.dumazet

On Wed, 09 Dec 2009 18:05:42 -0800
chavey@google.com wrote:

> @@ -221,6 +222,7 @@ struct tcp_options_received {
>  	u8	num_sacks;	/* Number of SACK blocks		*/
>  	u16	user_mss;  	/* mss requested by user in ioctl */
>  	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
> +	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
>  };

You could use the hole in the structure:

                snd_wscale : 4, /* Window scaling received from sender  */
                rcv_wscale : 4; /* Window scaling to send to receiver   */
+	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
/*      SACKs data      */
        u8      num_sacks;      /* Number of SACK blocks        

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-10 16:57 ` Stephen Hemminger
@ 2009-12-10 18:55   ` Joe Perches
  0 siblings, 0 replies; 16+ messages in thread
From: Joe Perches @ 2009-12-10 18:55 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: chavey, davem, netdev, therbert, eric.dumazet

On Thu, 2009-12-10 at 08:57 -0800, Stephen Hemminger wrote:
> You could use the hole in the structure:
>                 snd_wscale : 4, /* Window scaling received from sender  */
>                 rcv_wscale : 4; /* Window scaling to send to receiver   */
> +	u8	init_rcv_wnd;	/* TCP initial receive window in MSS */
> /*      SACKs data      */
>         u8      num_sacks;      /* Number of SACK blocks        

That hole got filled by tcp cookies.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-10  2:21     ` David Miller
@ 2009-12-10 21:55       ` Laurent Chavey
  0 siblings, 0 replies; 16+ messages in thread
From: Laurent Chavey @ 2009-12-10 21:55 UTC (permalink / raw)
  To: David Miller; +Cc: therbert, andi, netdev

I like the suggestion, it keeps it in the line of how init_cwnd works now.

On Wed, Dec 9, 2009 at 6:21 PM, David Miller <davem@davemloft.net> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Wed, 9 Dec 2009 10:01:24 -0800
>
>> If we can use larger initial congestion windows on private networks
>> that we know to be
>
> With the keyword here being "network", which is why this must be a
> route attribute not a sysctl with global unilatteral effect.
>
> Otherwise I can tell you what people with both forward and inward
> facing interfaces are going to do, they'll turn the thing on globally
> with your sysctl even though they process real Internet traffic.
>
> No way, we're not going to make it so easy to screw things up.
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Add sysctl to set the advertised TCP initial receive window.
  2009-12-10  2:05 chavey
  2009-12-10 16:57 ` Stephen Hemminger
@ 2009-12-13  3:27 ` Eric W. Biederman
  1 sibling, 0 replies; 16+ messages in thread
From: Eric W. Biederman @ 2009-12-13  3:27 UTC (permalink / raw)
  To: chavey; +Cc: davem, netdev, therbert, joe, eric.dumazet

chavey@google.com writes:
>  
>  /* Update system visible IP port range */
>  static void set_local_port_range(int range[2])
> @@ -656,6 +658,16 @@ static struct ctl_table ipv4_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +	{
> +		.ctl_name	= CTL_UNNUMBERED,
> +		.procname	= "tcp_init_rcv_wnd",
> +		.data 		= &sysctl_tcp_init_rcv_wnd,
> +		.maxlen 	= sizeof(int),
> +		.mode 		= 0644,
> +		.proc_handler 	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &tcp_init_rcv_wnd_max
> +	},
>  #ifdef CONFIG_NETLABEL
>  	{
>  		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,

It looks like this won't be solved with a sysctl now, but I want to
mention that ctl_name is now gone.  So this patch won't compile in
mainline any more.

Eric

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2009-12-13  3:27 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-09 20:13 [PATCH] Add sysctl to set the advertised TCP initial receive window chavey
2009-12-09 20:33 ` Eric Dumazet
2009-12-09 20:55   ` Joe Perches
2009-12-09 21:11     ` Eric Dumazet
  -- strict thread matches above, loose matches on Subject: below --
2009-12-10  2:05 chavey
2009-12-10 16:57 ` Stephen Hemminger
2009-12-10 18:55   ` Joe Perches
2009-12-13  3:27 ` Eric W. Biederman
2009-12-09  0:30 chavey
2009-12-09  0:59 ` Joe Perches
2009-12-08 22:40 chavey
2009-12-08 23:00 ` Joe Perches
2009-12-09 13:26 ` Andi Kleen
2009-12-09 18:05   ` Tom Herbert
     [not found]   ` <65634d660912091001s44016cccq4f2422e613ba9db9@mail.gmail.com>
2009-12-10  2:21     ` David Miller
2009-12-10 21:55       ` Laurent Chavey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).