netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Lawrence Brakmo <brakmo@fb.com>
To: netdev <netdev@vger.kernel.org>
Cc: Kernel Team <kernel-team@fb.com>, Blake Matheny <bmatheny@fb.com>,
	Alexei Starovoitov <ast@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	David Ahern <dsa@cumulusnetworks.com>
Subject: [PATCH net-next v4 05/16] bpf: Support for setting initial receive window
Date: Wed, 28 Jun 2017 10:31:13 -0700	[thread overview]
Message-ID: <20170628173124.3299500-6-brakmo@fb.com> (raw)
In-Reply-To: <20170628173124.3299500-1-brakmo@fb.com>

This patch adds suppport for setting the initial advertized window from
within a BPF_SOCK_OPS program. This can be used to support larger
initial cwnd values in environments where it is known to be safe.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/net/tcp.h        | 10 ++++++++++
 include/uapi/linux/bpf.h |  4 ++++
 net/ipv4/tcp_minisocks.c |  9 ++++++++-
 net/ipv4/tcp_output.c    |  7 ++++++-
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cd9ef63..af404aa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2069,4 +2069,14 @@ static inline u32 tcp_timeout_init(struct sock *sk, bool is_req_sock)
 	return timeout;
 }
 
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk, bool is_req_sock)
+{
+	int rwnd;
+
+	rwnd = tcp_call_bpf(sk, is_req_sock, BPF_SOCK_OPS_RWND_INIT);
+
+	if (rwnd < 0)
+		rwnd = 0;
+	return rwnd;
+}
 #endif	/* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4174668..cdec348 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -749,6 +749,10 @@ enum {
 	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
 					 * -1 if default value should be used
 					 */
+	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
+					 * window (in packets) or -1 if default
+					 * value should be used
+					 */
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d30ee31..bbaf3c6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 	int full_space = tcp_full_space(sk_listener);
 	u32 window_clamp;
 	__u8 rcv_wscale;
+	u32 rcv_wnd;
 	int mss;
 
 	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
 		req->rsk_window_clamp = full_space;
 
+	rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req, true);
+	if (rcv_wnd == 0)
+		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+	else if (full_space < rcv_wnd * mss)
+		full_space = rcv_wnd * mss;
+
 	/* tcp_full_space because it is guaranteed to be the first packet */
 	tcp_select_initial_window(full_space,
 		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 		&req->rsk_window_clamp,
 		ireq->wscale_ok,
 		&rcv_wscale,
-		dst_metric(dst, RTAX_INITRWND));
+		rcv_wnd);
 	ireq->rcv_wscale = rcv_wscale;
 }
 EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5e478a1..e5f623f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3267,6 +3267,7 @@ static void tcp_connect_init(struct sock *sk)
 	const struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
+	u32 rcv_wnd;
 
 	/* We'll fix this up when we get a response from the other end.
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3300,13 +3301,17 @@ static void tcp_connect_init(struct sock *sk)
 	    (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
 		tp->window_clamp = tcp_full_space(sk);
 
+	rcv_wnd = tcp_rwnd_init_bpf(sk, false);
+	if (rcv_wnd == 0)
+		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
 	tcp_select_initial_window(tcp_full_space(sk),
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
 				  &rcv_wscale,
-				  dst_metric(dst, RTAX_INITRWND));
+				  rcv_wnd);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
-- 
2.9.3

  parent reply	other threads:[~2017-06-28 17:31 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-28 17:31 [PATCH net-next v4 00/16] bpf: BPF cgroup support for sock_ops Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 01/16] bpf: BPF " Lawrence Brakmo
2017-06-28 19:53   ` Alexei Starovoitov
2017-06-29  9:46   ` Daniel Borkmann
2017-06-30  7:27     ` Lawrence Brakmo
2017-06-29 15:57   ` kbuild test robot
2017-06-29 16:21   ` kbuild test robot
2017-06-28 17:31 ` [PATCH net-next v4 02/16] bpf: program to load and attach sock_ops BPF progs Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 03/16] bpf: Support for per connection SYN/SYN-ACK RTOs Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 04/16] bpf: Sample bpf program to set " Lawrence Brakmo
2017-06-29 19:39   ` Jesper Dangaard Brouer
2017-06-29 22:25     ` Lawrence Brakmo
2017-06-28 17:31 ` Lawrence Brakmo [this message]
2017-06-28 17:31 ` [PATCH net-next v4 06/16] bpf: Sample bpf program to set initial window Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 07/16] bpf: Add setsockopt helper function to bpf Lawrence Brakmo
2017-06-29 10:08   ` Daniel Borkmann
2017-06-28 17:31 ` [PATCH net-next v4 08/16] bpf: Add TCP connection BPF callbacks Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 09/16] bpf: Sample BPF program to set buffer sizes Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 10/16] bpf: Add support for changing congestion control Lawrence Brakmo
2017-06-30 12:50   ` kbuild test robot
2017-06-28 17:31 ` [PATCH net-next v4 11/16] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 12/16] bpf: Adds support for setting initial cwnd Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 13/16] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 14/16] bpf: Adds support for setting sndcwnd clamp Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 15/16] bpf: Sample bpf program to set " Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 16/16] bpf: update tools/include/uapi/linux/bpf.h Lawrence Brakmo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170628173124.3299500-6-brakmo@fb.com \
    --to=brakmo@fb.com \
    --cc=ast@fb.com \
    --cc=bmatheny@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=dsa@cumulusnetworks.com \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).