From: Mathieu Jadin <mathjadin@gmail.com>
To: bpf@vger.kernel.org
Cc: Mathieu Jadin <mathjadin@gmail.com>,
KP Singh <kpsingh@kernel.org>,
netdev@vger.kernel.org, Martin KaFai Lau <kafai@fb.com>,
Song Liu <songliubraving@fb.com>, Yonghong Song <yhs@fb.com>,
John Fastabend <john.fastabend@gmail.com>,
Jakub Kicinski <kuba@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Eric Dumazet <edumazet@google.com>,
"David S. Miller" <davem@davemloft.net>,
Joe Stringer <joe@cilium.io>, David Ahern <dsahern@kernel.org>,
Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Subject: [PATCH bpf-next 1/2] net: Parse IPv6 ext headers from TCP sock_ops
Date: Tue, 7 Dec 2021 17:22:48 +0100 [thread overview]
Message-ID: <20211207162249.301625-1-mathjadin@gmail.com> (raw)
Add a flag that, if set, triggers the call of eBPF program for each
packet holding an IPv6 extension header. Also add a sock_ops operator
that identifies such call.
This change uses skb_data and skb_data_end introduced for TCP options'
parsing but these pointer cover the IPv6 header and its extension
headers.
For instance, this change allows to read an eBPF sock_ops program to
read complex Segment Routing Headers carrying complex messages in TLV or
observing its intermediate segments as soon as they are received.
Signed-off-by: Mathieu Jadin <mathjadin@gmail.com>
---
include/uapi/linux/bpf.h | 26 +++++++++++++++++++++++++-
net/ipv6/tcp_ipv6.c | 26 ++++++++++++++++++++++++--
tools/include/uapi/linux/bpf.h | 26 +++++++++++++++++++++++++-
3 files changed, 74 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6297eafdc40f..79968e57b0b0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5822,6 +5822,10 @@ struct bpf_sock_ops {
* the 3WHS.
*
* bpf_load_hdr_opt() can also be used to read a particular option.
+ *
+ * Under sock_ops->op == BPF_SOCK_OPS_PARSE_IP6_HDR_CB,
+ * [skb_data, skb_data_end] covers the whole IPv6 header
+ * with its extension headers.
*/
__bpf_md_ptr(void *, skb_data);
__bpf_md_ptr(void *, skb_data_end);
@@ -5890,8 +5894,15 @@ enum {
* options first before the BPF program does.
*/
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
+ /* Call bpf for all received IPv6 extension headers. The bpf prog will
+ * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and
+ * will be able to parse the IPv6 header and its extension headers.
+ *
+ * The bpf prog will usually turn this off in the common cases.
+ */
+ BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7),
/* Mask of all currently supported cb flags */
- BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF,
};
/* List of known BPF sock_ops operators.
@@ -6004,6 +6015,19 @@ enum {
* by the kernel or the
* earlier bpf-progs.
*/
+ BPF_SOCK_OPS_PARSE_IPV6_HDR_CB, /* Parse the IPv6 extension
+ * header option.
+ * It will be called to handle
+ * the packets received at
+ * an already established
+ * connection with an extension
+ * header.
+ *
+ * sock_ops->skb_data:
+ * Referring to the received skb.
+ * It covers the IPv6 header and
+ * its extension headers only.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3b7d6ede1364..20c83c089ebf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1471,7 +1471,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
- struct tcp_sock *tp;
+ struct tcp_sock *tp = tcp_sk(sk);
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
@@ -1519,6 +1519,29 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
}
+ /* Call ebpf on packets with extension headers */
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG) &&
+ ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) {
+ struct bpf_sock_ops_kern sock_ops;
+ void *old_data_ptr;
+
+ memset(&sock_ops, 0,
+ offsetof(struct bpf_sock_ops_kern, temp));
+ if (sk_fullsock(sk)) {
+ sock_ops.is_fullsock = 1;
+ sock_owned_by_me(sk);
+ }
+ sock_ops.op = BPF_SOCK_OPS_PARSE_IPV6_HDR_CB;
+ sock_ops.sk = sk;
+ sock_ops.skb = skb;
+ /* Temporary use the network header as skb data */
+ sock_ops.skb_data_end = skb_transport_header(skb);
+ old_data_ptr = skb->data;
+ skb->data = skb_network_header(skb);
+ BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+ skb->data = old_data_ptr;
+ }
+
tcp_rcv_established(sk, skb);
if (opt_skb)
goto ipv6_pktoptions;
@@ -1572,7 +1595,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
3. socket is not in passive state.
4. Finally, it really contains options, which user wants to receive.
*/
- tp = tcp_sk(sk);
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6297eafdc40f..79968e57b0b0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5822,6 +5822,10 @@ struct bpf_sock_ops {
* the 3WHS.
*
* bpf_load_hdr_opt() can also be used to read a particular option.
+ *
+ * Under sock_ops->op == BPF_SOCK_OPS_PARSE_IP6_HDR_CB,
+ * [skb_data, skb_data_end] covers the whole IPv6 header
+ * with its extension headers.
*/
__bpf_md_ptr(void *, skb_data);
__bpf_md_ptr(void *, skb_data_end);
@@ -5890,8 +5894,15 @@ enum {
* options first before the BPF program does.
*/
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
+ /* Call bpf for all received IPv6 extension headers. The bpf prog will
+ * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and
+ * will be able to parse the IPv6 header and its extension headers.
+ *
+ * The bpf prog will usually turn this off in the common cases.
+ */
+ BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7),
/* Mask of all currently supported cb flags */
- BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF,
};
/* List of known BPF sock_ops operators.
@@ -6004,6 +6015,19 @@ enum {
* by the kernel or the
* earlier bpf-progs.
*/
+ BPF_SOCK_OPS_PARSE_IPV6_HDR_CB, /* Parse the IPv6 extension
+ * header option.
+ * It will be called to handle
+ * the packets received at
+ * an already established
+ * connection with an extension
+ * header.
+ *
+ * sock_ops->skb_data:
+ * Referring to the received skb.
+ * It covers the IPv6 header and
+ * its extension headers only.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
--
2.32.0
next reply other threads:[~2021-12-07 16:23 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-07 16:22 Mathieu Jadin [this message]
2021-12-07 16:22 ` [PATCH bpf-next 2/2] selftests/bpf: Test for IPv6 ext header parsing Mathieu Jadin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211207162249.301625-1-mathjadin@gmail.com \
--to=mathjadin@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=joe@cilium.io \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).