[PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Mathieu Jadin <mathjadin@gmail.com>
To: bpf@vger.kernel.org
Cc: Mathieu Jadin <mathjadin@gmail.com>,
	KP Singh <kpsingh@kernel.org>,
	netdev@vger.kernel.org, Martin KaFai Lau <kafai@fb.com>,
	Song Liu <songliubraving@fb.com>, Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Jakub Kicinski <kuba@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eric Dumazet <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>,
	Joe Stringer <joe@cilium.io>, David Ahern <dsahern@kernel.org>,
	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Subject: [PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops
Date: Tue,  7 Dec 2021 17:22:48 +0100	[thread overview]
Message-ID: <20211207162249.301625-1-mathjadin@gmail.com> (raw)

Add a flag that, if set, triggers the call of eBPF program for each
packet holding an IPv6 extension header. Also add a sock_ops operator
that identifies such call.

This change uses skb_data and skb_data_end introduced for TCP options'
parsing but these pointer cover the IPv6 header and its extension
headers.

For instance, this change allows to read an eBPF sock_ops program to
read complex Segment Routing Headers carrying complex messages in TLV or
observing its intermediate segments as soon as they are received.

Signed-off-by: Mathieu Jadin <mathjadin@gmail.com>
---
 include/uapi/linux/bpf.h       | 26 +++++++++++++++++++++++++-
 net/ipv6/tcp_ipv6.c            | 26 ++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h | 26 +++++++++++++++++++++++++-
 3 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6297eafdc40f..79968e57b0b0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5822,6 +5822,10 @@ struct bpf_sock_ops {
 	 *					the 3WHS.
 	 *
 	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 *
+	 * Under sock_ops->op ==  BPF_SOCK_OPS_PARSE_IP6_HDR_CB,
+	 * [skb_data, skb_data_end] covers the whole IPv6 header
+	 * with its extension headers.
 	 */
 	__bpf_md_ptr(void *, skb_data);
 	__bpf_md_ptr(void *, skb_data_end);
@@ -5890,8 +5894,15 @@ enum {
 	 * options first before the BPF program does.
 	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
+	/* Call bpf for all received IPv6 extension headers.  The bpf prog will
+	 * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and
+	 * will be able to parse the IPv6 header and its extension headers.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xFF,
 };
 
 /* List of known BPF sock_ops operators.
@@ -6004,6 +6015,19 @@ enum {
 					 * by the kernel or the
 					 * earlier bpf-progs.
 					 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB,	/* Parse the IPv6 extension
+					 * header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection with an extension
+					 * header.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the IPv6 header and
+					 * its extension headers only.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3b7d6ede1364..20c83c089ebf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1471,7 +1471,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct sk_buff *opt_skb = NULL;
-	struct tcp_sock *tp;
+	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
 	   goes to IPv4 receive handler and backlogged.
@@ -1519,6 +1519,29 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
+		/* Call ebpf on packets with extension headers */
+		if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG) &&
+		    ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) {
+			struct bpf_sock_ops_kern sock_ops;
+			void *old_data_ptr;
+
+			memset(&sock_ops, 0,
+			       offsetof(struct bpf_sock_ops_kern, temp));
+			if (sk_fullsock(sk)) {
+				sock_ops.is_fullsock = 1;
+				sock_owned_by_me(sk);
+			}
+			sock_ops.op = BPF_SOCK_OPS_PARSE_IPV6_HDR_CB;
+			sock_ops.sk = sk;
+			sock_ops.skb = skb;
+			/* Temporary use the network header as skb data */
+			sock_ops.skb_data_end = skb_transport_header(skb);
+			old_data_ptr = skb->data;
+			skb->data = skb_network_header(skb);
+			BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+			skb->data = old_data_ptr;
+		}
+
 		tcp_rcv_established(sk, skb);
 		if (opt_skb)
 			goto ipv6_pktoptions;
@@ -1572,7 +1595,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	   3. socket is not in passive state.
 	   4. Finally, it really contains options, which user wants to receive.
 	 */
-	tp = tcp_sk(sk);
 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6297eafdc40f..79968e57b0b0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5822,6 +5822,10 @@ struct bpf_sock_ops {
 	 *					the 3WHS.
 	 *
 	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 *
+	 * Under sock_ops->op ==  BPF_SOCK_OPS_PARSE_IP6_HDR_CB,
+	 * [skb_data, skb_data_end] covers the whole IPv6 header
+	 * with its extension headers.
 	 */
 	__bpf_md_ptr(void *, skb_data);
 	__bpf_md_ptr(void *, skb_data_end);
@@ -5890,8 +5894,15 @@ enum {
 	 * options first before the BPF program does.
 	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
+	/* Call bpf for all received IPv6 extension headers.  The bpf prog will
+	 * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and
+	 * will be able to parse the IPv6 header and its extension headers.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xFF,
 };
 
 /* List of known BPF sock_ops operators.
@@ -6004,6 +6015,19 @@ enum {
 					 * by the kernel or the
 					 * earlier bpf-progs.
 					 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB,	/* Parse the IPv6 extension
+					 * header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection with an extension
+					 * header.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the IPv6 header and
+					 * its extension headers only.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
-- 
2.32.0

next             reply	other threads:[~2021-12-07 16:23 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-07 16:22 Mathieu Jadin [this message]
2021-12-07 16:22 ` [PATCH bpf-next 2/2] selftests/bpf: Test for IPv6 ext header parsing Mathieu Jadin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6297eafdc40 dfblob:79968e57b0b dfblob:3b7d6ede136
dfblob:20c83c089eb dfblob:6297eafdc40 dfblob:79968e57b0b )
 OR (
bs:"[PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211207162249.301625-1-mathjadin@gmail.com \
    --to=mathjadin@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=joe@cilium.io \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).