All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lawrence Brakmo <brakmo@fb.com>
To: netdev <netdev@vger.kernel.org>
Cc: Kernel Team <kernel-team@fb.com>, Blake Matheny <bmatheny@fb.com>,
	Alexei Starovoitov <ast@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	David Ahern <dsa@cumulusnetworks.com>
Subject: [PATCH net-next v3 05/15] bpf: Support for setting initial receive window
Date: Mon, 19 Jun 2017 20:00:38 -0700	[thread overview]
Message-ID: <20170620030048.3275347-6-brakmo@fb.com> (raw)
In-Reply-To: <20170620030048.3275347-1-brakmo@fb.com>

This patch adds suppport for setting the initial advertized window from
within a BPF_SOCK_OPS program. This can be used to support larger
initial cwnd values in environments where it is known to be safe.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/net/tcp.h        | 10 ++++++++++
 include/uapi/linux/bpf.h |  4 ++++
 net/ipv4/tcp_minisocks.c |  9 ++++++++-
 net/ipv4/tcp_output.c    |  7 ++++++-
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index bdf6bfd..ff806d7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2062,4 +2062,14 @@ static inline u32 tcp_timeout_init(struct sock *sk, bool is_req_sock)
 	return timeout;
 }
 
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk, bool is_req_sock)
+{
+	int rwnd;
+
+	rwnd = tcp_call_bpf(sk, is_req_sock, BPF_SOCK_OPS_RWND_INIT);
+
+	if (rwnd < 0)
+		rwnd = 0;
+	return rwnd;
+}
 #endif	/* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4532c31..314fdf3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -749,6 +749,10 @@ enum {
 	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
 					 * -1 if default value should be used
 					 */
+	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
+					 * window (in packets) or -1 if default
+					 * value should be used
+					 */
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d30ee31..bbaf3c6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 	int full_space = tcp_full_space(sk_listener);
 	u32 window_clamp;
 	__u8 rcv_wscale;
+	u32 rcv_wnd;
 	int mss;
 
 	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
 		req->rsk_window_clamp = full_space;
 
+	rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req, true);
+	if (rcv_wnd == 0)
+		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+	else if (full_space < rcv_wnd * mss)
+		full_space = rcv_wnd * mss;
+
 	/* tcp_full_space because it is guaranteed to be the first packet */
 	tcp_select_initial_window(full_space,
 		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 		&req->rsk_window_clamp,
 		ireq->wscale_ok,
 		&rcv_wscale,
-		dst_metric(dst, RTAX_INITRWND));
+		rcv_wnd);
 	ireq->rcv_wscale = rcv_wscale;
 }
 EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5e478a1..e5f623f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3267,6 +3267,7 @@ static void tcp_connect_init(struct sock *sk)
 	const struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
+	u32 rcv_wnd;
 
 	/* We'll fix this up when we get a response from the other end.
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3300,13 +3301,17 @@ static void tcp_connect_init(struct sock *sk)
 	    (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
 		tp->window_clamp = tcp_full_space(sk);
 
+	rcv_wnd = tcp_rwnd_init_bpf(sk, false);
+	if (rcv_wnd == 0)
+		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
 	tcp_select_initial_window(tcp_full_space(sk),
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
 				  &rcv_wscale,
-				  dst_metric(dst, RTAX_INITRWND));
+				  rcv_wnd);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
-- 
2.9.3

  parent reply	other threads:[~2017-06-20  3:00 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-20  3:00 PATCH net-next v3 00/15 Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 01/15] bpf: BPF support for sock_ops Lawrence Brakmo
2017-06-22 22:41   ` Daniel Borkmann
2017-06-22 22:58     ` Lawrence Brakmo
2017-06-22 23:19       ` Daniel Borkmann
2017-06-22 23:57         ` Lawrence Brakmo
2017-06-23 21:15           ` Daniel Borkmann
2017-06-28 17:45             ` Lawrence Brakmo
2017-06-29  9:47               ` Daniel Borkmann
2017-06-20  3:00 ` [PATCH net-next v3 02/15] bpf: program to load sock_ops BPF programs Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 03/15] bpf: Support for per connection SYN/SYN-ACK RTOs Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 04/15] bpf: Sample bpf program to set " Lawrence Brakmo
2017-06-20  3:00 ` Lawrence Brakmo [this message]
2017-06-20  3:00 ` [PATCH net-next v3 06/15] bpf: Sample bpf program to set initial window Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 07/15] bpf: Add setsockopt helper function to bpf Lawrence Brakmo
2017-06-20 21:25   ` Craig Gallek
2017-06-21 16:51     ` Lawrence Brakmo
2017-06-21 17:13       ` Craig Gallek
2017-06-21 23:55         ` Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 08/15] bpf: Add TCP connection BPF callbacks Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 09/15] bpf: Sample BPF program to set buffer sizes Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 10/15] bpf: Add support for changing congestion control Lawrence Brakmo
2017-06-20  8:40   ` kbuild test robot
2017-06-20  3:00 ` [PATCH net-next v3 11/15] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 12/15] bpf: Adds support for setting initial cwnd Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 13/15] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 14/15] bpf: Adds support for setting sndcwnd clamp Lawrence Brakmo
2017-06-20  3:00 ` [PATCH net-next v3 15/15] bpf: Sample bpf program to set " Lawrence Brakmo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170620030048.3275347-6-brakmo@fb.com \
    --to=brakmo@fb.com \
    --cc=ast@fb.com \
    --cc=bmatheny@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=dsa@cumulusnetworks.com \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.