netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops
@ 2021-12-07 16:22 Mathieu Jadin
  2021-12-07 16:22 ` [PATCH bpf-next 2/2] selftests/bpf: Test for IPv6 ext header parsing Mathieu Jadin
  0 siblings, 1 reply; 2+ messages in thread
From: Mathieu Jadin @ 2021-12-07 16:22 UTC (permalink / raw)
  To: bpf
  Cc: Mathieu Jadin, KP Singh, netdev, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, Jakub Kicinski, Andrii Nakryiko,
	Alexei Starovoitov, Daniel Borkmann, Eric Dumazet,
	David S. Miller, Joe Stringer, David Ahern, Hideaki YOSHIFUJI

Add a flag that, if set, triggers the call of eBPF program for each
packet holding an IPv6 extension header. Also add a sock_ops operator
that identifies such call.

This change uses skb_data and skb_data_end introduced for TCP options'
parsing but these pointer cover the IPv6 header and its extension
headers.

For instance, this change allows to read an eBPF sock_ops program to
read complex Segment Routing Headers carrying complex messages in TLV or
observing its intermediate segments as soon as they are received.

Signed-off-by: Mathieu Jadin <mathjadin@gmail.com>
---
 include/uapi/linux/bpf.h       | 26 +++++++++++++++++++++++++-
 net/ipv6/tcp_ipv6.c            | 26 ++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h | 26 +++++++++++++++++++++++++-
 3 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6297eafdc40f..79968e57b0b0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5822,6 +5822,10 @@ struct bpf_sock_ops {
 	 *					the 3WHS.
 	 *
 	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 *
+	 * Under sock_ops->op ==  BPF_SOCK_OPS_PARSE_IP6_HDR_CB,
+	 * [skb_data, skb_data_end] covers the whole IPv6 header
+	 * with its extension headers.
 	 */
 	__bpf_md_ptr(void *, skb_data);
 	__bpf_md_ptr(void *, skb_data_end);
@@ -5890,8 +5894,15 @@ enum {
 	 * options first before the BPF program does.
 	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
+	/* Call bpf for all received IPv6 extension headers.  The bpf prog will
+	 * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and
+	 * will be able to parse the IPv6 header and its extension headers.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xFF,
 };
 
 /* List of known BPF sock_ops operators.
@@ -6004,6 +6015,19 @@ enum {
 					 * by the kernel or the
 					 * earlier bpf-progs.
 					 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB,	/* Parse the IPv6 extension
+					 * header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection with an extension
+					 * header.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the IPv6 header and
+					 * its extension headers only.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3b7d6ede1364..20c83c089ebf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1471,7 +1471,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct sk_buff *opt_skb = NULL;
-	struct tcp_sock *tp;
+	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
 	   goes to IPv4 receive handler and backlogged.
@@ -1519,6 +1519,29 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
+		/* Call ebpf on packets with extension headers */
+		if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG) &&
+		    ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) {
+			struct bpf_sock_ops_kern sock_ops;
+			void *old_data_ptr;
+
+			memset(&sock_ops, 0,
+			       offsetof(struct bpf_sock_ops_kern, temp));
+			if (sk_fullsock(sk)) {
+				sock_ops.is_fullsock = 1;
+				sock_owned_by_me(sk);
+			}
+			sock_ops.op = BPF_SOCK_OPS_PARSE_IPV6_HDR_CB;
+			sock_ops.sk = sk;
+			sock_ops.skb = skb;
+			/* Temporary use the network header as skb data */
+			sock_ops.skb_data_end = skb_transport_header(skb);
+			old_data_ptr = skb->data;
+			skb->data = skb_network_header(skb);
+			BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+			skb->data = old_data_ptr;
+		}
+
 		tcp_rcv_established(sk, skb);
 		if (opt_skb)
 			goto ipv6_pktoptions;
@@ -1572,7 +1595,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	   3. socket is not in passive state.
 	   4. Finally, it really contains options, which user wants to receive.
 	 */
-	tp = tcp_sk(sk);
 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6297eafdc40f..79968e57b0b0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5822,6 +5822,10 @@ struct bpf_sock_ops {
 	 *					the 3WHS.
 	 *
 	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 *
+	 * Under sock_ops->op ==  BPF_SOCK_OPS_PARSE_IP6_HDR_CB,
+	 * [skb_data, skb_data_end] covers the whole IPv6 header
+	 * with its extension headers.
 	 */
 	__bpf_md_ptr(void *, skb_data);
 	__bpf_md_ptr(void *, skb_data_end);
@@ -5890,8 +5894,15 @@ enum {
 	 * options first before the BPF program does.
 	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
+	/* Call bpf for all received IPv6 extension headers.  The bpf prog will
+	 * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and
+	 * will be able to parse the IPv6 header and its extension headers.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xFF,
 };
 
 /* List of known BPF sock_ops operators.
@@ -6004,6 +6015,19 @@ enum {
 					 * by the kernel or the
 					 * earlier bpf-progs.
 					 */
+	BPF_SOCK_OPS_PARSE_IPV6_HDR_CB,	/* Parse the IPv6 extension
+					 * header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection with an extension
+					 * header.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the IPv6 header and
+					 * its extension headers only.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH bpf-next 2/2] selftests/bpf: Test for IPv6 ext header parsing
  2021-12-07 16:22 [PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops Mathieu Jadin
@ 2021-12-07 16:22 ` Mathieu Jadin
  0 siblings, 0 replies; 2+ messages in thread
From: Mathieu Jadin @ 2021-12-07 16:22 UTC (permalink / raw)
  To: bpf
  Cc: Mathieu Jadin, John Fastabend, Yonghong Song, linux-kselftest,
	Martin KaFai Lau, netdev, Shuah Khan, KP Singh, Daniel Borkmann,
	Alexei Starovoitov, Song Liu, Andrii Nakryiko

This test creates a client and a server exchanging a single byte
with a Segment Routing Header and the eBPF program saves
the inner segment in a sk_storage. The test program checks that
the segment is correct.

Signed-off-by: Mathieu Jadin <mathjadin@gmail.com>
---
 .../bpf/prog_tests/tcp_ipv6_exthdr_srh.c      | 171 ++++++++++++++++++
 .../selftests/bpf/progs/tcp_ipv6_exthdr_srh.c |  78 ++++++++
 2 files changed, 249 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tcp_ipv6_exthdr_srh.c
 create mode 100644 tools/testing/selftests/bpf/progs/tcp_ipv6_exthdr_srh.c

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_ipv6_exthdr_srh.c b/tools/testing/selftests/bpf/prog_tests/tcp_ipv6_exthdr_srh.c
new file mode 100644
index 000000000000..70f7ee230975
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_ipv6_exthdr_srh.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/seg6.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+struct tcp_srh_storage {
+	struct in6_addr inner_segment;
+};
+
+static void send_byte(int fd)
+{
+	char b = 0x55;
+
+	if (CHECK_FAIL(send(fd, &b, sizeof(b), 0) != 1))
+		perror("Failed to send single byte");
+}
+
+static int verify_srh(int map_fd, int server_fd, struct ipv6_sr_hdr *client_srh)
+{
+	int err = 0;
+	struct tcp_srh_storage val;
+
+	if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &server_fd, &val) < 0)) {
+		perror("Failed to read socket storage");
+		return -1;
+	}
+
+	if (memcmp(&val.inner_segment, &client_srh->segments[1],
+		   sizeof(struct in6_addr))) {
+		log_err("The inner segment of the received SRH differs from the sent one");
+		err++;
+	}
+
+	return err;
+}
+
+static int run_test(int cgroup_fd, int listen_fd)
+{
+	struct bpf_prog_load_attr attr = {
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+		.file = "./tcp_ipv6_exthdr_srh.o",
+		.expected_attach_type = BPF_CGROUP_SOCK_OPS,
+	};
+	size_t srh_size = sizeof(struct ipv6_sr_hdr) +
+		2 * sizeof(struct in6_addr);
+	struct ipv6_sr_hdr *client_srh;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	struct timeval tv;
+	int client_fd;
+	int server_fd;
+	int prog_fd;
+	int map_fd;
+	char byte;
+	int err;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	map = bpf_object__next_map(obj, NULL);
+	map_fd = bpf_map__fd(map);
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	client_fd = connect_to_fd(listen_fd, 0);
+	if (client_fd < 0) {
+		err = -1;
+		goto close_bpf_object;
+	}
+
+	server_fd = accept(listen_fd, NULL, 0);
+	if (server_fd < 0) {
+		err = -1;
+		goto close_client_fd;
+	}
+
+	/* Set an SRH with ::1 as an intermediate segment on the client */
+
+	client_srh = calloc(1, srh_size);
+	if (!client_srh) {
+		log_err("Failed to create the SRH to send");
+		goto close_server_fd;
+	}
+	client_srh->type = IPV6_SRCRT_TYPE_4;
+	// We do not count the first 8 bytes (RFC 8200 Section 4.4)
+	client_srh->hdrlen = (2 * sizeof(struct in6_addr)) >> 3;
+	client_srh->segments_left = 1;
+	client_srh->first_segment = 1;
+	// client_srh->segments[0] is set by the kernel
+	memcpy(&client_srh->segments[1], &in6addr_loopback,
+	       sizeof(struct in6_addr));
+
+	if (setsockopt(client_fd, SOL_IPV6, IPV6_RTHDR, client_srh,
+		       srh_size)) {
+		log_err("Failed to set the SRH on the client");
+		goto free_srh;
+	}
+
+	/* Send traffic with this SRH
+	 * and check its parsing on the server side
+	 */
+
+	tv.tv_sec = 1;
+	tv.tv_usec = 0;
+	if (setsockopt(server_fd, SOL_SOCKET, SO_RCVTIMEO, (const char *)&tv,
+		       sizeof(tv))) {
+		log_err("Failed to set the receive timeout on the server");
+		err = -1;
+		goto free_srh;
+	}
+
+	send_byte(client_fd);
+	if (recv(server_fd, &byte, 1, 0) != 1) {
+		log_err("Failed to get the byte under one second on the server 2");
+		err = -1;
+		goto free_srh;
+	}
+
+	err += verify_srh(map_fd, server_fd, client_srh);
+
+free_srh:
+	free(client_srh);
+close_server_fd:
+	close(server_fd);
+close_client_fd:
+	close(client_fd);
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+void test_tcp_ipv6_exthdr_srh(void)
+{
+	int server_fd, cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/tcp_ipv6_exthdr_srh");
+	if (CHECK_FAIL(cgroup_fd < 0))
+		return;
+
+	server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+
+	if (CHECK_FAIL(system("sysctl net.ipv6.conf.all.seg6_enabled=1")))
+		goto close_server;
+
+	if (CHECK_FAIL(system("sysctl net.ipv6.conf.lo.seg6_enabled=1")))
+		goto reset_sysctl;
+
+	CHECK_FAIL(run_test(cgroup_fd, server_fd));
+
+	if (CHECK_FAIL(system("sysctl net.ipv6.conf.lo.seg6_enabled=0")))
+		log_err("Cannot reset sysctl net.ipv6.conf.lo.seg6_enabled to 0");
+
+reset_sysctl:
+	if (CHECK_FAIL(system("sysctl net.ipv6.conf.all.seg6_enabled=0")))
+		log_err("Cannot reset sysctl net.ipv6.conf.all.seg6_enabled to 0");
+
+close_server:
+	close(server_fd);
+close_cgroup_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ipv6_exthdr_srh.c b/tools/testing/selftests/bpf/progs/tcp_ipv6_exthdr_srh.c
new file mode 100644
index 000000000000..276bda8bbecb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ipv6_exthdr_srh.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/seg6.h>
+#include <linux/bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define NEXTHDR_ROUTING	43
+
+struct tcp_srh_storage {
+	struct in6_addr inner_segment;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct tcp_srh_storage);
+} socket_storage_map SEC(".maps");
+
+/* Check the header received from the active side */
+static int read_incoming_srh(struct bpf_sock_ops *skops,
+			     struct tcp_srh_storage *storage)
+{
+	__u32 seg_size = 2 * sizeof(struct in6_addr);
+	struct ipv6_sr_hdr *srh;
+	struct ipv6hdr *ip6;
+	void *seg_list;
+	int ret = 1;
+
+	ip6 = (struct ipv6hdr *)skops->skb_data;
+	if (ip6 + 1 <= skops->skb_data_end && ip6->nexthdr == NEXTHDR_ROUTING) {
+		srh = (struct ipv6_sr_hdr *)(ip6 + 1);
+		if (srh + 1 <= skops->skb_data_end) {
+			if (srh->type != IPV6_SRCRT_TYPE_4)
+				return ret;
+
+			seg_list = (void *)(srh + 1);
+			if (seg_list + seg_size <= skops->skb_data_end) {
+				// This is an SRH with at least 2 segments
+				storage->inner_segment = srh->segments[1];
+				ret = 0;
+			}
+		}
+	}
+
+	return ret;
+}
+
+SEC("sockops")
+int srh_read(struct bpf_sock_ops *skops)
+{
+	struct tcp_srh_storage *storage;
+	int true_val = 1;
+
+	if (!skops->sk)
+		return 1;
+
+	storage = bpf_sk_storage_get(&socket_storage_map, skops->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		bpf_sock_ops_cb_flags_set(skops, skops->bpf_sock_ops_cb_flags |
+				  BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG);
+		break;
+	case BPF_SOCK_OPS_PARSE_IPV6_HDR_CB:
+		return read_incoming_srh(skops, storage);
+	}
+
+	return 0;
+}
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-12-07 16:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-12-07 16:22 [PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops Mathieu Jadin
2021-12-07 16:22 ` [PATCH bpf-next 2/2] selftests/bpf: Test for IPv6 ext header parsing Mathieu Jadin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).