netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Shrijeet Mukherjee <shrijeetoss@gmail.com>
To: daniel@iogearbox.net
Cc: netdev@vger.kernel.org, roopa@cumulusnetworks.com,
	dsa@cumulusnetworks.com, ast@fb.com
Subject: [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF
Date: Wed, 28 Mar 2018 09:27:15 -0700	[thread overview]
Message-ID: <20180328162715.113-2-shrijeetoss@gmail.com> (raw)
In-Reply-To: <20180328162715.113-1-shrijeetoss@gmail.com>

From: Shrijeet Mukherjee <shrijeet@gmail.com>

Push: takes an array/size of labels and encaps
Pop : Takes a number and pops the top "n" labels

Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>

PS: Will be adding tests as well, refactoring my test into something
smaller.

---
 include/linux/bpf.h      |   2 +
 include/net/mpls.h       |  38 +++++++++++++-
 include/uapi/linux/bpf.h |  11 +++-
 net/core/filter.c        | 127 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..b38bfabc1fb5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -672,6 +672,8 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
+extern const struct bpf_func_proto bpf_skb_mpls_push_proto;
+extern const struct bpf_func_proto bpf_skb_mpls_pop_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 
diff --git a/include/net/mpls.h b/include/net/mpls.h
index 2583dbc689b8..3a5b8c00823d 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -24,14 +24,50 @@ struct mpls_shim_hdr {
 	__be32 label_stack_entry;
 };
 
+struct mpls_entry_decoded {
+	u32 label;
+	u8 ttl;
+	u8 tc;
+	u8 bos;
+};
+
 static inline bool eth_p_mpls(__be16 eth_type)
 {
 	return eth_type == htons(ETH_P_MPLS_UC) ||
 		eth_type == htons(ETH_P_MPLS_MC);
 }
 
-static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
+static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb)
 {
 	return (struct mpls_shim_hdr *)skb_network_header(skb);
 }
+
+static inline struct mpls_shim_hdr
+mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos)
+{
+	struct mpls_shim_hdr result;
+
+	result.label_stack_entry =
+		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT)
+            | (tc << MPLS_LS_TC_SHIFT)
+            | (bos ? (1 << MPLS_LS_S_SHIFT) : 0)
+            | (ttl << MPLS_LS_TTL_SHIFT));
+
+	return result;
+}
+
+static inline
+struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
+{
+	struct mpls_entry_decoded result;
+	unsigned int entry = be32_to_cpu(hdr->label_stack_entry);
+
+	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
+
+	return result;
+}
+
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..2278548e1f8f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -439,6 +439,12 @@ union bpf_attr {
  * int bpf_skb_vlan_pop(skb)
  *     Return: 0 on success or negative error
  *
+ * int bpf_skb_mpls_push(skb, num_lbls, lbls[])
+ *     Return 0 on success or negative error
+ *
+ * int bpf_skb_mpls_pop(skb, num_lbls)
+ *     Return number of popped labels, 0 is no-op, deliver packet to current dst
+ *
  * int bpf_skb_get_tunnel_key(skb, key, size, flags)
  * int bpf_skb_set_tunnel_key(skb, key, size, flags)
  *     retrieve or populate tunnel metadata
@@ -794,7 +800,10 @@ union bpf_attr {
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),
+	FN(msg_pull_data),		\
+	FN(skb_mpls_push),		\
+	FN(skb_mpls_pop),
+
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 00f62fafc788..c96ae8ef423d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 }
 
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-	   u32, mode, u64, flags)
+				u32, mode, u64, flags)
 {
 	if (unlikely(flags))
 		return -EINVAL;
@@ -2542,6 +2542,125 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
+static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
+{
+	u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
+	int ret;
+
+	ret = skb_cow(skb, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_set_inner_protocol(skb, skb->protocol);
+	skb_reset_inner_network_header(skb);
+
+	ret = bpf_skb_generic_push(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, ETH_HLEN);
+	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
+
+	if (skb_is_gso(skb)) {
+/* Due to header grow, MSS needs to be downgraded. */
+		skb_shinfo(skb)->gso_size -= len_diff;
+/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	bpf_compute_data_pointers(skb);
+	return 0;
+}
+
+BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
+{
+	u32 i = 0;
+	struct mpls_shim_hdr *hdr;
+	unsigned char *cursor;
+	struct mpls_entry_decoded dec;
+
+	if (num_lbls == 0)
+		return 0;
+
+	cursor = skb_network_header(skb);
+	do {
+		hdr = (struct mpls_shim_hdr *)cursor;
+		dec = mpls_entry_decode(hdr);
+		i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
+	} while (dec.bos != 1 && i < num_lbls);
+
+	bpf_push_mac_rcsum(skb);
+	skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
+	skb_reset_network_header(skb);
+
+	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
+	bpf_pull_mac_rcsum(skb);
+	bpf_compute_data_pointers(skb);
+
+	return i;
+}
+
+const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
+				.func   = bpf_skb_mpls_pop,
+				.gpl_only = false,
+				.ret_type = RET_INTEGER,
+				.arg1_type  = ARG_PTR_TO_CTX,
+				.arg2_type  = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
+
+BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
+	   __be32*, lbls, u32, num_lbls)
+{
+	int ret, i;
+	unsigned int new_header_size = num_lbls * sizeof(__be32);
+	unsigned int ttl = 255;
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device *out_dev = dst->dev;
+	struct mpls_shim_hdr *hdr;
+	bool bos;
+
+	/* Ensure there is enough space for the headers in the skb */
+	ret = bpf_skb_mpls_net_grow(skb, new_header_size);
+	if (ret < 0) {
+		trace_printk("COW was killed\n");
+		bpf_compute_data_pointers(skb);
+		return -ENOMEM;
+	}
+
+	skb->dev = out_dev;
+/* XXX this may need finesse to integrate with
+ * global TTL values for MPLS
+ */
+	if (dst->ops->family == AF_INET)
+		ttl = ip_hdr(skb)->ttl;
+	else if (dst->ops->family == AF_INET6)
+		ttl = ipv6_hdr(skb)->hop_limit;
+
+	/* Push the new labels */
+	hdr = skb_mpls_hdr(skb);
+	bos = true;
+	for (i = num_lbls - 1; i >= 0; i--) {
+		hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
+		bos = false;
+	}
+
+	bpf_compute_data_pointers(skb);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_skb_mpls_push_proto = {
+	.func		= bpf_skb_mpls_push,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
+
 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
 {
 	u32 min_len = skb_network_offset(skb);
@@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
 	    func == bpf_skb_vlan_pop ||
+	    func == bpf_skb_mpls_push ||
+	    func == bpf_skb_mpls_pop ||
 	    func == bpf_skb_store_bytes ||
 	    func == bpf_skb_change_proto ||
 	    func == bpf_skb_change_head ||
@@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_vlan_push_proto;
 	case BPF_FUNC_skb_vlan_pop:
 		return &bpf_skb_vlan_pop_proto;
+	case BPF_FUNC_skb_mpls_push:
+		return &bpf_skb_mpls_push_proto;
+	case BPF_FUNC_skb_mpls_pop:
+		return &bpf_skb_mpls_pop_proto;
 	case BPF_FUNC_skb_change_proto:
 		return &bpf_skb_change_proto_proto;
 	case BPF_FUNC_skb_change_type:
-- 
2.16.2.windows.1

  reply	other threads:[~2018-03-28 16:27 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-28 16:27 [PATCH bpf-next RFC 1/2] Organize MPLS headers Shrijeet Mukherjee
2018-03-28 16:27 ` Shrijeet Mukherjee [this message]
2018-03-28 18:23   ` [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF David Ahern
2018-03-29 17:35     ` Shrijeet mukherjee
2018-03-28 18:20 ` [PATCH bpf-next RFC 1/2] Organize MPLS headers David Ahern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180328162715.113-2-shrijeetoss@gmail.com \
    --to=shrijeetoss@gmail.com \
    --cc=ast@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=dsa@cumulusnetworks.com \
    --cc=netdev@vger.kernel.org \
    --cc=roopa@cumulusnetworks.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).