Netdev List
 help / color / mirror / Atom feed
From: Martin KaFai Lau <kafai@fb.com>
To: <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eric Dumazet <edumazet@google.com>, <kernel-team@fb.com>,
	Lawrence Brakmo <brakmo@fb.com>,
	Neal Cardwell <ncardwell@google.com>, <netdev@vger.kernel.org>,
	Yuchung Cheng <ycheng@google.com>
Subject: [PATCH v3 bpf-next 4/9] tcp: Add unknown_opt arg to tcp_parse_options
Date: Thu, 30 Jul 2020 13:57:23 -0700	[thread overview]
Message-ID: <20200730205723.3353838-1-kafai@fb.com> (raw)
In-Reply-To: <20200730205657.3351905-1-kafai@fb.com>

In the latter patch, the bpf prog only wants to be called to handle
a header option if that particular header option cannot be handled by
the kernel.  This unknown option could be written by the peer's bpf-prog.
It could also be a new standard option that the running kernel does not
support it while a bpf-prog can handle it.

In a latter patch, the bpf prog will be called from tcp_validate_incoming()
if there is unknown option and a flag is set in tp->bpf_sock_ops_cb_flags.

Instead of using skb->cb[] in an earlier attempt, this patch
adds an optional arg "bool *unknown_opt" to tcp_parse_options().
The bool will be set to true if it has encountered an option
that the kernel does not recognize.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 drivers/infiniband/hw/cxgb4/cm.c |  2 +-
 include/net/tcp.h                |  3 ++-
 net/ipv4/syncookies.c            |  2 +-
 net/ipv4/tcp_input.c             | 40 +++++++++++++++++++++-----------
 net/ipv4/tcp_minisocks.c         |  4 ++--
 net/ipv6/syncookies.c            |  2 +-
 6 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30e08bcc9afb..dedca6576bb9 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3949,7 +3949,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
 	 */
 	memset(&tmp_opt, 0, sizeof(tmp_opt));
 	tcp_clear_options(&tmp_opt);
-	tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
+	tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL);
 
 	req = __skb_push(skb, sizeof(*req));
 	memset(req, 0, sizeof(*req));
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 895e7aabf136..d49d8f1c961a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -413,7 +413,8 @@ int tcp_mmap(struct file *file, struct socket *sock,
 #endif
 void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
-		       int estab, struct tcp_fastopen_cookie *foc);
+		       int estab, struct tcp_fastopen_cookie *foc,
+		       bool *unknown_opt);
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 /*
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 9a4f6b16c9bc..fd39aed4fcd3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL);
 
 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
 		tsoff = secure_tcp_ts_off(sock_net(sk),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6c38ca9de17e..d9c878001be2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3799,7 +3799,7 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
 	foc->exp = exp_opt;
 }
 
-static void smc_parse_options(const struct tcphdr *th,
+static bool smc_parse_options(const struct tcphdr *th,
 			      struct tcp_options_received *opt_rx,
 			      const unsigned char *ptr,
 			      int opsize)
@@ -3808,10 +3808,13 @@ static void smc_parse_options(const struct tcphdr *th,
 	if (static_branch_unlikely(&tcp_have_smc)) {
 		if (th->syn && !(opsize & 1) &&
 		    opsize >= TCPOLEN_EXP_SMC_BASE &&
-		    get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+		    get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
 			opt_rx->smc_ok = 1;
+			return true;
+		}
 	}
 #endif
+	return false;
 }
 
 /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
@@ -3864,7 +3867,8 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
 void tcp_parse_options(const struct net *net,
 		       const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx, int estab,
-		       struct tcp_fastopen_cookie *foc)
+		       struct tcp_fastopen_cookie *foc,
+		       bool *unknown_opt)
 {
 	const unsigned char *ptr;
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -3962,15 +3966,23 @@ void tcp_parse_options(const struct net *net,
 				 */
 				if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
 				    get_unaligned_be16(ptr) ==
-				    TCPOPT_FASTOPEN_MAGIC)
+				    TCPOPT_FASTOPEN_MAGIC) {
 					tcp_parse_fastopen_option(opsize -
 						TCPOLEN_EXP_FASTOPEN_BASE,
 						ptr + 2, th->syn, foc, true);
-				else
-					smc_parse_options(th, opt_rx, ptr,
-							  opsize);
+					break;
+				}
+
+				if (smc_parse_options(th, opt_rx, ptr, opsize))
+					break;
+
+				if (unknown_opt)
+					*unknown_opt = true;
 				break;
 
+			default:
+				if (unknown_opt)
+					*unknown_opt = true;
 			}
 			ptr += opsize-2;
 			length -= opsize;
@@ -4003,7 +4015,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
  */
 static bool tcp_fast_parse_options(const struct net *net,
 				   const struct sk_buff *skb,
-				   const struct tcphdr *th, struct tcp_sock *tp)
+				   const struct tcphdr *th, struct tcp_sock *tp,
+				   bool *unknown_opt)
 {
 	/* In the spirit of fast parsing, compare doff directly to constant
 	 * values.  Because equality is used, short doff can be ignored here.
@@ -4017,7 +4030,7 @@ static bool tcp_fast_parse_options(const struct net *net,
 			return true;
 	}
 
-	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
+	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, unknown_opt);
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
@@ -5492,9 +5505,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool rst_seq_match = false;
+	bool unknown_opt = false;
 
 	/* RFC1323: H1. Apply PAWS check first. */
-	if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
+	if (tcp_fast_parse_options(sock_net(sk), skb, th, tp, &unknown_opt) &&
 	    tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
@@ -5866,7 +5880,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
 		tcp_clear_options(&opt);
 		opt.user_mss = opt.mss_clamp = 0;
-		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
+		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, NULL);
 		mss = opt.mss_clamp;
 	}
 
@@ -5951,7 +5965,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	int saved_clamp = tp->rx_opt.mss_clamp;
 	bool fastopen_fail;
 
-	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
+	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, NULL);
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
@@ -6685,7 +6699,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tmp_opt.mss_clamp = af_ops->mss_clamp;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
 	tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
-			  want_cookie ? NULL : &foc);
+			  want_cookie ? NULL : &foc, NULL);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 495dda2449fe..61f9194802c4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
+		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			if (tmp_opt.rcv_tsecr)
@@ -580,7 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
-		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 13235a012388..f22961a73c2b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -157,7 +157,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL);
 
 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
 		tsoff = secure_tcpv6_ts_off(sock_net(sk),
-- 
2.24.1


  parent reply	other threads:[~2020-07-30 20:57 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-30 20:56 [PATCH v3 bpf-next 0/9] BPF TCP header options Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 1/9] tcp: Use a struct to represent a saved_syn Martin KaFai Lau
2020-07-31 15:57   ` Eric Dumazet
2020-07-31 17:31     ` Eric Dumazet
2020-07-30 20:57 ` [PATCH v3 bpf-next 2/9] tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 3/9] tcp: bpf: Add TCP_BPF_RTO_MIN for bpf_setsockopt Martin KaFai Lau
2020-07-30 20:57 ` Martin KaFai Lau [this message]
2020-07-31 16:12   ` [PATCH v3 bpf-next 4/9] tcp: Add unknown_opt arg to tcp_parse_options Eric Dumazet
2020-07-31 17:37     ` Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 5/9] bpf: sock_ops: Change some members of sock_ops_kern from u32 to u8 Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 6/9] bpf: tcp: Allow bpf prog to write and parse TCP header option Martin KaFai Lau
2020-07-31 16:06   ` Eric Dumazet
2020-07-31 17:59     ` Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 7/9] bpf: selftests: Add fastopen_connect to network_helpers Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 8/9] bpf: selftests: tcp header options Martin KaFai Lau
2020-07-30 20:57 ` [PATCH v3 bpf-next 9/9] tcp: bpf: Optionally store mac header in TCP_SAVE_SYN Martin KaFai Lau
2020-07-31 15:51   ` Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200730205723.3353838-1-kafai@fb.com \
    --to=kafai@fb.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brakmo@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=edumazet@google.com \
    --cc=kernel-team@fb.com \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=ycheng@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox