[PATCH bpf-next v5 2/5] bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Peter Oskolkov <posk@google.com>
To: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	netdev@vger.kernel.org
Cc: Peter Oskolkov <posk.devel@gmail.com>,
	David Ahern <dsahern@gmail.com>, Peter Oskolkov <posk@google.com>
Subject: [PATCH bpf-next v5 2/5] bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap
Date: Wed, 30 Jan 2019 15:51:33 -0800	[thread overview]
Message-ID: <20190130235136.136527-3-posk@google.com> (raw)
In-Reply-To: <20190130235136.136527-1-posk@google.com>

This patch implements BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap
BPF helper. It enables BPF programs (specifically, BPF_PROG_TYPE_LWT_IN
and BPF_PROG_TYPE_LWT_XMIT prog types) to add IP encapsulation headers
to packets (e.g. IP/GRE, GUE, IPIP).

This is useful when thousands of different short-lived flows should be
encapped, each with different and dynamically determined destination.
Although lwtunnels can be used in some of these scenarios, the ability
to dynamically generate encap headers adds more flexibility, e.g.
when routing depends on the state of the host (reflected in global bpf
maps).

Signed-off-by: Peter Oskolkov <posk@google.com>
---
 include/net/lwtunnel.h |  3 +++
 net/core/filter.c      |  3 ++-
 net/core/lwt_bpf.c     | 59 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 33fd9ba7e0e5..f0973eca8036 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -126,6 +126,8 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+			  bool ingress);
 
 static inline void lwtunnel_set_redirect(struct dst_entry *dst)
 {
@@ -138,6 +140,7 @@ static inline void lwtunnel_set_redirect(struct dst_entry *dst)
 		dst->input = lwtunnel_input;
 	}
 }
+
 #else
 
 static inline void lwtstate_free(struct lwtunnel_state *lws)
diff --git a/net/core/filter.c b/net/core/filter.c
index 27d3fbe4b77b..de6bd4b4e0a3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -73,6 +73,7 @@
 #include <linux/seg6_local.h>
 #include <net/seg6.h>
 #include <net/seg6_local.h>
+#include <net/lwtunnel.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -4804,7 +4805,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
 static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
 			     bool ingress)
 {
-	return -EINVAL;  /* Implemented in the next patch. */
+	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
 }
 
 BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index a648568c5e8f..6a6e9acab73d 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -390,6 +390,65 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
 	.owner		= THIS_MODULE,
 };
 
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
+{
+	struct iphdr *iph;
+	bool ipv4;
+	int err;
+
+	if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
+		return -EINVAL;
+
+	/* validate protocol and length */
+	iph = (struct iphdr *)hdr;
+	if (iph->version == 4) {
+		ipv4 = true;
+		if (unlikely(len < iph->ihl * 4))
+			return -EINVAL;
+	} else if (iph->version == 6) {
+		ipv4 = false;
+		if (unlikely(len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	if (ingress)
+		err = skb_cow_head(skb, len + skb->mac_len);
+	else
+		err = skb_cow_head(skb,
+				   len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
+	if (unlikely(err))
+		return err;
+
+	/* push the encap headers and fix pointers */
+	skb_reset_inner_headers(skb);
+	skb->encapsulation = 1;
+	skb_push(skb, len);
+	if (ingress)
+		skb_postpush_rcsum(skb, iph, len);
+	skb_reset_network_header(skb);
+	memcpy(skb_network_header(skb), hdr, len);
+	bpf_compute_data_pointers(skb);
+
+	if (ipv4) {
+		skb->protocol = htons(ETH_P_IP);
+		iph = ip_hdr(skb);
+		if (iph->ihl * 4 < len)
+			skb_set_transport_header(skb, iph->ihl * 4);
+
+		if (!iph->check)
+			iph->check = ip_fast_csum((unsigned char *)iph,
+						  iph->ihl);
+	} else {
+		skb->protocol = htons(ETH_P_IPV6);
+		if (sizeof(struct ipv6hdr) < len)
+			skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+	}
+
+	return 0;
+}
+
 static int __init bpf_lwt_init(void)
 {
 	return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
-- 
2.20.1.495.gaa96b0ce6b-goog

next prev parent reply	other threads:[~2019-01-30 23:51 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-30 23:51 [PATCH bpf-next v5 0/5] bpf: add BPF_LWT_ENCAP_IP option to bpf_lwt_push_encap Peter Oskolkov
2019-01-30 23:51 ` [PATCH bpf-next v5 1/5] bpf: add plumbing for BPF_LWT_ENCAP_IP in bpf_lwt_push_encap Peter Oskolkov
2019-01-30 23:51 ` Peter Oskolkov [this message]
2019-01-31 23:43   ` [PATCH bpf-next v5 2/5] bpf: implement BPF_LWT_ENCAP_IP mode " Daniel Borkmann
2019-02-01  1:47     ` Willem de Bruijn
2019-02-01 16:48       ` Peter Oskolkov
2019-01-30 23:51 ` [PATCH bpf-next v5 3/5] bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c Peter Oskolkov
2019-01-30 23:51 ` [PATCH bpf-next v5 4/5] bpf: sync <kdir>/<uapi>/bpf.h with tools/<uapi>/bpf.h Peter Oskolkov
2019-01-30 23:51 ` [PATCH bpf-next v5 5/5] selftests: bpf: add test_lwt_ip_encap selftest Peter Oskolkov
2019-01-31 16:38 ` [PATCH bpf-next v5 0/5] bpf: add BPF_LWT_ENCAP_IP option to bpf_lwt_push_encap David Ahern

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:33fd9ba7e0e dfblob:f0973eca803 dfblob:27d3fbe4b77
dfblob:de6bd4b4e0a dfblob:a648568c5e8 dfblob:6a6e9acab73 )
 OR (
bs:"[PATCH bpf-next v5 2/5] bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190130235136.136527-3-posk@google.com \
    --to=posk@google.com \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dsahern@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=posk.devel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).