[PATCH net-next 0/2] bpf: performance improvements

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next 0/2] bpf: performance improvements
@ 2015-09-16  1:51 Alexei Starovoitov
  2015-09-16  1:51 ` [PATCH net-next 1/2] cls_bpf: introduce integrated actions Alexei Starovoitov
  2015-09-16  1:51 ` [PATCH net-next 2/2] bpf: add bpf_redirect() helper Alexei Starovoitov
  0 siblings, 2 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2015-09-16  1:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

At plumbers we discussed different options on how to get rid of skb_clone
from bpf_clone_redirect(), the patch 2 implements the best option.
Patch 1 adds 'integrated exts' to cls_bpf to improve performance by
combining simple actions into bpf classifier.

Alexei Starovoitov (1):
  bpf: add bpf_redirect() helper

Daniel Borkmann (1):
  cls_bpf: introduce integrated actions

 include/net/sch_generic.h    |    3 +-
 include/uapi/linux/bpf.h     |    9 ++++++
 include/uapi/linux/pkt_cls.h |    4 +++
 net/core/dev.c               |    8 ++++++
 net/core/filter.c            |   63 ++++++++++++++++++++++++++++++++++++++++++
 net/sched/act_bpf.c          |    1 +
 net/sched/cls_bpf.c          |   61 ++++++++++++++++++++++++++++++++--------
 samples/bpf/bpf_helpers.h    |    4 +++
 samples/bpf/tcbpf1_kern.c    |   24 +++++++++++++++-
 9 files changed, 164 insertions(+), 13 deletions(-)

-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/2] cls_bpf: introduce integrated actions
  2015-09-16  1:51 [PATCH net-next 0/2] bpf: performance improvements Alexei Starovoitov
@ 2015-09-16  1:51 ` Alexei Starovoitov
  2015-09-16  1:51 ` [PATCH net-next 2/2] bpf: add bpf_redirect() helper Alexei Starovoitov
  1 sibling, 0 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2015-09-16  1:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

From: Daniel Borkmann <daniel@iogearbox.net>

Often cls_bpf classifier is used with single action drop attached.
Optimize this use case and let cls_bpf return both classid and action.
For backwards compatibility reasons enable this feature under
TCA_BPF_FLAG_ACT_DIRECT flag.

Then more interesting programs like the following are easier to write:
int cls_bpf_prog(struct __sk_buff *skb)
{
  /* classify arp, ip, ipv6 into different traffic classes
   * and drop all other packets
   */
  switch (skb->protocol) {
  case htons(ETH_P_ARP):
    skb->tc_classid = 1;
    break;
  case htons(ETH_P_IP):
    skb->tc_classid = 2;
    break;
  case htons(ETH_P_IPV6):
    skb->tc_classid = 3;
    break;
  default:
    return TC_ACT_SHOT;
  }

  return TC_ACT_OK;
}

Joint work with Daniel Borkmann.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 include/net/sch_generic.h    |    2 +-
 include/uapi/linux/bpf.h     |    1 +
 include/uapi/linux/pkt_cls.h |    3 +++
 net/core/filter.c            |   14 ++++++++++
 net/sched/cls_bpf.c          |   60 ++++++++++++++++++++++++++++++++++--------
 5 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 444faa89a55f..da61febb9091 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -251,7 +251,7 @@ struct tcf_proto {
 struct qdisc_skb_cb {
 	unsigned int		pkt_len;
 	u16			slave_dev_queue_mapping;
-	u16			_pad;
+	u16			tc_classid;
 #define QDISC_CB_PRIV_LEN 20
 	unsigned char		data[QDISC_CB_PRIV_LEN];
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 92a48e2d5461..2fbd1c71fa3b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -293,6 +293,7 @@ struct __sk_buff {
 	__u32 tc_index;
 	__u32 cb[5];
 	__u32 hash;
+	__u32 tc_classid;
 };
 
 struct bpf_tunnel_key {
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 4f0d1bc3647d..0a262a83f9d4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -373,6 +373,8 @@ enum {
 
 /* BPF classifier */
 
+#define TCA_BPF_FLAG_ACT_DIRECT		(1 << 0)
+
 enum {
 	TCA_BPF_UNSPEC,
 	TCA_BPF_ACT,
@@ -382,6 +384,7 @@ enum {
 	TCA_BPF_OPS,
 	TCA_BPF_FD,
 	TCA_BPF_NAME,
+	TCA_BPF_FLAGS,
 	__TCA_BPF_MAX,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 13079f03902e..971d6ba89758 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1632,6 +1632,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type)
 {
+	if (off == offsetof(struct __sk_buff, tc_classid))
+		return false;
+
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct __sk_buff, cb[0]) ...
@@ -1648,6 +1651,9 @@ static bool sk_filter_is_valid_access(int off, int size,
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type)
 {
+	if (off == offsetof(struct __sk_buff, tc_classid))
+		return type == BPF_WRITE ? true : false;
+
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct __sk_buff, mark):
@@ -1760,6 +1766,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
 		break;
 
+	case offsetof(struct __sk_buff, tc_classid):
+		ctx_off -= offsetof(struct __sk_buff, tc_classid);
+		ctx_off += offsetof(struct sk_buff, cb);
+		ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+		WARN_ON(type != BPF_WRITE);
+		*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+		break;
+
 	case offsetof(struct __sk_buff, tc_index):
 #ifdef CONFIG_NET_SCHED
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index e5168f8b9640..77b0ef148256 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -38,6 +38,7 @@ struct cls_bpf_prog {
 	struct bpf_prog *filter;
 	struct list_head link;
 	struct tcf_result res;
+	bool exts_integrated;
 	struct tcf_exts exts;
 	u32 handle;
 	union {
@@ -52,6 +53,7 @@ struct cls_bpf_prog {
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
+	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
 	[TCA_BPF_FD]		= { .type = NLA_U32 },
 	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
 	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
@@ -59,6 +61,22 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
 };
 
+static int cls_bpf_exec_opcode(int code)
+{
+	switch (code) {
+	case TC_ACT_OK:
+	case TC_ACT_RECLASSIFY:
+	case TC_ACT_SHOT:
+	case TC_ACT_PIPE:
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+	case TC_ACT_UNSPEC:
+		return code;
+	default:
+		return TC_ACT_UNSPEC;
+	}
+}
+
 static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			    struct tcf_result *res)
 {
@@ -79,6 +97,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	list_for_each_entry_rcu(prog, &head->plist, link) {
 		int filter_res;
 
+		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
+
 		if (at_ingress) {
 			/* It is safe to push/pull even if skb_shared() */
 			__skb_push(skb, skb->mac_len);
@@ -88,6 +108,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			filter_res = BPF_PROG_RUN(prog->filter, skb);
 		}
 
+		if (prog->exts_integrated) {
+			res->class = prog->res.class;
+			res->classid = qdisc_skb_cb(skb)->tc_classid;
+
+			ret = cls_bpf_exec_opcode(filter_res);
+			if (ret == TC_ACT_UNSPEC)
+				continue;
+			break;
+		}
+
 		if (filter_res == 0)
 			continue;
 
@@ -195,8 +225,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 	return ret;
 }
 
-static int cls_bpf_prog_from_ops(struct nlattr **tb,
-				 struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 {
 	struct sock_filter *bpf_ops;
 	struct sock_fprog_kern fprog_tmp;
@@ -230,15 +259,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb,
 	prog->bpf_ops = bpf_ops;
 	prog->bpf_num_ops = bpf_num_ops;
 	prog->bpf_name = NULL;
-
 	prog->filter = fp;
-	prog->res.classid = classid;
 
 	return 0;
 }
 
-static int cls_bpf_prog_from_efd(struct nlattr **tb,
-				 struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+				 const struct tcf_proto *tp)
 {
 	struct bpf_prog *fp;
 	char *name = NULL;
@@ -268,9 +295,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb,
 	prog->bpf_ops = NULL;
 	prog->bpf_fd = bpf_fd;
 	prog->bpf_name = name;
-
 	prog->filter = fp;
-	prog->res.classid = classid;
 
 	return 0;
 }
@@ -280,8 +305,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 				   unsigned long base, struct nlattr **tb,
 				   struct nlattr *est, bool ovr)
 {
+	bool is_bpf, is_ebpf, have_exts = false;
 	struct tcf_exts exts;
-	bool is_bpf, is_ebpf;
 	u32 classid;
 	int ret;
 
@@ -298,9 +323,22 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 		return ret;
 
 	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+	if (tb[TCA_BPF_FLAGS]) {
+		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+			tcf_exts_destroy(&exts);
+			return -EINVAL;
+		}
+
+		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+	}
+
+	prog->res.classid = classid;
+	prog->exts_integrated = have_exts;
 
-	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
-		       cls_bpf_prog_from_efd(tb, prog, classid);
+	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+		       cls_bpf_prog_from_efd(tb, prog, tp);
 	if (ret < 0) {
 		tcf_exts_destroy(&exts);
 		return ret;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/2] bpf: add bpf_redirect() helper
  2015-09-16  1:51 [PATCH net-next 0/2] bpf: performance improvements Alexei Starovoitov
  2015-09-16  1:51 ` [PATCH net-next 1/2] cls_bpf: introduce integrated actions Alexei Starovoitov
@ 2015-09-16  1:51 ` Alexei Starovoitov
  2015-09-16  3:10   ` John Fastabend
  1 sibling, 1 reply; 7+ messages in thread
From: Alexei Starovoitov @ 2015-09-16  1:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

Existing bpf_clone_redirect() helper clones skb before redirecting
it to RX or TX of destination netdev.
Introduce bpf_redirect() helper that does that without cloning.

Benchmarked with two hosts using 10G ixgbe NICs.
One host is doing line rate pktgen.
Another host is configured as:
$ tc qdisc add dev $dev ingress
$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop
so it receives the packet on $dev and immediately xmits it on $dev + 1
The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program
that does bpf_clone_redirect() and performance is 2.0 Mpps

$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section redirect_xmit drop
which is using bpf_redirect() - 2.4 Mpps

and using cls_bpf with integrated actions as:
$ tc filter add dev $dev root pref 10 \
  bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1
performance is 2.5 Mpps

To summarize:
u32+act_bpf using clone_redirect - 2.0 Mpps
u32+act_bpf using redirect - 2.4 Mpps
cls_bpf using redirect - 2.5 Mpps

For comparison linux bridge in this setup is doing 2.1 Mpps
and ixgbe rx + drop in ip_rcv - 7.8 Mpps

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
This approach is using per_cpu scratch area to store ifindex and flags.
The other alternatives discussed at plumbers are slower and more intrusive.

 include/net/sch_generic.h    |    1 +
 include/uapi/linux/bpf.h     |    8 +++++++
 include/uapi/linux/pkt_cls.h |    1 +
 net/core/dev.c               |    8 +++++++
 net/core/filter.c            |   49 ++++++++++++++++++++++++++++++++++++++++++
 net/sched/act_bpf.c          |    1 +
 net/sched/cls_bpf.c          |    1 +
 samples/bpf/bpf_helpers.h    |    4 ++++
 samples/bpf/tcbpf1_kern.c    |   24 ++++++++++++++++++++-
 9 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index da61febb9091..4c79ce8c1f92 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
 			       const struct qdisc_size_table *stab);
 bool tcf_destroy(struct tcf_proto *tp, bool force);
 void tcf_destroy_chain(struct tcf_proto __rcu **fl);
+int skb_do_redirect(struct sk_buff *);
 
 /* Reset all TX qdiscs greater then index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2fbd1c71fa3b..4ec0b5488294 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -272,6 +272,14 @@ enum bpf_func_id {
 	BPF_FUNC_skb_get_tunnel_key,
 	BPF_FUNC_skb_set_tunnel_key,
 	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
+	/**
+	 * bpf_redirect(ifindex, flags) - redirect to another netdev
+	 * @ifindex: ifindex of the net device
+	 * @flags: bit 0 - if set, redirect to ingress instead of egress
+	 *         other bits - reserved
+	 * Return: TC_ACT_REDIRECT
+	 */
+	BPF_FUNC_redirect,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 0a262a83f9d4..439873775d49 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -87,6 +87,7 @@ enum {
 #define TC_ACT_STOLEN		4
 #define TC_ACT_QUEUED		5
 #define TC_ACT_REPEAT		6
+#define TC_ACT_REDIRECT		7
 #define TC_ACT_JUMP		0x10000000
 
 /* Action type identifiers*/
diff --git a/net/core/dev.c b/net/core/dev.c
index 877c84834d81..d6a492e57874 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3668,6 +3668,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 	case TC_ACT_QUEUED:
 		kfree_skb(skb);
 		return NULL;
+	case TC_ACT_REDIRECT:
+		/* skb_mac_header check was done by cls/act_bpf, so
+		 * we can safely push the L2 header back before
+		 * redirecting to another netdev
+		 */
+		__skb_push(skb, skb->mac_len);
+		skb_do_redirect(skb);
+		return NULL;
 	default:
 		break;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index 971d6ba89758..5bf273bab781 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1427,6 +1427,53 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+struct redirect_info {
+	u32 ifindex;
+	u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct net_device *dev;
+
+	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+	ri->ifindex = 0;
+	if (unlikely(!dev)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (unlikely(!(dev->flags & IFF_UP))) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+		return dev_forward_skb(dev, skb);
+
+	skb->dev = dev;
+	return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+	.func           = bpf_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_ANYTHING,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1607,6 +1654,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
 		return bpf_get_skb_set_tunnel_key_proto();
+	case BPF_FUNC_redirect:
+		return &bpf_redirect_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 559bfa011bda..0bc6f912f870 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 	case TC_ACT_PIPE:
 	case TC_ACT_RECLASSIFY:
 	case TC_ACT_OK:
+	case TC_ACT_REDIRECT:
 		action = filter_res;
 		break;
 	case TC_ACT_SHOT:
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 77b0ef148256..0590816ab7b0 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code)
 	case TC_ACT_PIPE:
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
+	case TC_ACT_REDIRECT:
 	case TC_ACT_UNSPEC:
 		return code;
 	default:
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 3a44d3a272af..21aa1b44c30c 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) =
 	(void *) BPF_FUNC_get_current_comm;
 static int (*bpf_perf_event_read)(void *map, int index) =
 	(void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+	(void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+	(void *) BPF_FUNC_redirect;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 9bfb2eb34563..fa051b3d53ee 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -5,7 +5,7 @@
 #include <uapi/linux/in.h>
 #include <uapi/linux/tcp.h>
 #include <uapi/linux/filter.h>
-
+#include <uapi/linux/pkt_cls.h>
 #include "bpf_helpers.h"
 
 /* compiler workaround */
@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb)
 
 	return 0;
 }
+SEC("redirect_xmit")
+int _redirect_xmit(struct __sk_buff *skb)
+{
+	return bpf_redirect(skb->ifindex + 1, 0);
+}
+SEC("redirect_recv")
+int _redirect_recv(struct __sk_buff *skb)
+{
+	return bpf_redirect(skb->ifindex + 1, 1);
+}
+SEC("clone_redirect_xmit")
+int _clone_redirect_xmit(struct __sk_buff *skb)
+{
+	bpf_clone_redirect(skb, skb->ifindex + 1, 0);
+	return TC_ACT_SHOT;
+}
+SEC("clone_redirect_recv")
+int _clone_redirect_recv(struct __sk_buff *skb)
+{
+	bpf_clone_redirect(skb, skb->ifindex + 1, 1);
+	return TC_ACT_SHOT;
+}
 char _license[] SEC("license") = "GPL";
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bpf: add bpf_redirect() helper
  2015-09-16  1:51 ` [PATCH net-next 2/2] bpf: add bpf_redirect() helper Alexei Starovoitov
@ 2015-09-16  3:10   ` John Fastabend
  2015-09-16  4:11     ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: John Fastabend @ 2015-09-16  3:10 UTC (permalink / raw)
  To: Alexei Starovoitov, David S. Miller
  Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

On 15-09-15 06:51 PM, Alexei Starovoitov wrote:
> Existing bpf_clone_redirect() helper clones skb before redirecting
> it to RX or TX of destination netdev.
> Introduce bpf_redirect() helper that does that without cloning.
> 
> Benchmarked with two hosts using 10G ixgbe NICs.
> One host is doing line rate pktgen.
> Another host is configured as:
> $ tc qdisc add dev $dev ingress
> $ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
>    action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop
> so it receives the packet on $dev and immediately xmits it on $dev + 1
> The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program
> that does bpf_clone_redirect() and performance is 2.0 Mpps
> 
> $ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
>    action bpf run object-file tcbpf1_kern.o section redirect_xmit drop
> which is using bpf_redirect() - 2.4 Mpps
> 
> and using cls_bpf with integrated actions as:
> $ tc filter add dev $dev root pref 10 \
>   bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1
> performance is 2.5 Mpps
> 
> To summarize:
> u32+act_bpf using clone_redirect - 2.0 Mpps
> u32+act_bpf using redirect - 2.4 Mpps
> cls_bpf using redirect - 2.5 Mpps
> 
> For comparison linux bridge in this setup is doing 2.1 Mpps
> and ixgbe rx + drop in ip_rcv - 7.8 Mpps
> 
> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> ---


Nice, I like this. But just to be sure I read this correctly this will
only work on the ingress qdisc for now right? To get the tx side working
will require a bit more care.

Thanks,
.John

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bpf: add bpf_redirect() helper
  2015-09-16  3:10   ` John Fastabend
@ 2015-09-16  4:11     ` Alexei Starovoitov
  2015-09-16  4:50       ` John Fastabend
  0 siblings, 1 reply; 7+ messages in thread
From: Alexei Starovoitov @ 2015-09-16  4:11 UTC (permalink / raw)
  To: John Fastabend, David S. Miller; +Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

On 9/15/15 8:10 PM, John Fastabend wrote:
> Nice, I like this. But just to be sure I read this correctly this will
> only work on the ingress qdisc for now right? To get the tx side working
> will require a bit more care.

correct.
For egress I'm waiting for Daniel to resubmit his preclassifier patch
and I'll hook this skb_do_redirect() there as well.
Other options are also possible, but preclassifier looks the best for
this purpose, since it's lockless.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bpf: add bpf_redirect() helper
  2015-09-16  4:11     ` Alexei Starovoitov
@ 2015-09-16  4:50       ` John Fastabend
  2015-09-16  5:08         ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: John Fastabend @ 2015-09-16  4:50 UTC (permalink / raw)
  To: Alexei Starovoitov, David S. Miller
  Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

On 15-09-15 09:11 PM, Alexei Starovoitov wrote:
> On 9/15/15 8:10 PM, John Fastabend wrote:
>> Nice, I like this. But just to be sure I read this correctly this will
>> only work on the ingress qdisc for now right? To get the tx side working
>> will require a bit more care.
> 
> correct.
> For egress I'm waiting for Daniel to resubmit his preclassifier patch
> and I'll hook this skb_do_redirect() there as well.
> Other options are also possible, but preclassifier looks the best for
> this purpose, since it's lockless.
> 

Great, works for me. One other question/observation,

+int skb_do_redirect(struct sk_buff *skb)
+{

[...]

+
+	if (unlikely(!(dev->flags & IFF_UP))) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}


The IFF_UP check is not needed as best I can tell, the dev_queue_xmit()
will check if the qdisc is active and the dev_forward_skb() path will
do a !netif_running check in enqueue_to_backlog() call.

Looks like you can remove the check. I would prefer to let the stack
handle this case using normal mechanisms.

I had to do a bit of tracking but netif_running check equates roughly
to your IFF_UP case via,

> 	__dev_change_flags()
> 		[...]
> 	        if ((old_flags ^ flags) & IFF_UP)
>                 	ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
> 		
> 
> 	__dev_close()
> 		[...]
> 		__dev_close_many()
> 
> 	__dev_close_many()
> 		[...]
>               clear_bit(__LINK_STATE_START, &dev->state);

Seem reasonable? Or did you put it there to work around some specific
case I'm missing?

.John

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bpf: add bpf_redirect() helper
  2015-09-16  4:50       ` John Fastabend
@ 2015-09-16  5:08         ` Alexei Starovoitov
  0 siblings, 0 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2015-09-16  5:08 UTC (permalink / raw)
  To: John Fastabend, David S. Miller; +Cc: Jamal Hadi Salim, Daniel Borkmann, netdev

On 9/15/15 9:50 PM, John Fastabend wrote:
> Looks like you can remove the check. I would prefer to let the stack
> handle this case using normal mechanisms.
>
> I had to do a bit of tracking but netif_running check equates roughly
> to your IFF_UP case via,
...
> Seem reasonable? Or did you put it there to work around some specific
> case I'm missing?

well, in the forwarding path is_skb_forwardable() does the IFF_UP check
before netif_running() has to do it, so yeah this check can be dropped.

Will fix in v2.
Thanks for the review!

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-09-16  5:08 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-16  1:51 [PATCH net-next 0/2] bpf: performance improvements Alexei Starovoitov
2015-09-16  1:51 ` [PATCH net-next 1/2] cls_bpf: introduce integrated actions Alexei Starovoitov
2015-09-16  1:51 ` [PATCH net-next 2/2] bpf: add bpf_redirect() helper Alexei Starovoitov
2015-09-16  3:10   ` John Fastabend
2015-09-16  4:11     ` Alexei Starovoitov
2015-09-16  4:50       ` John Fastabend
2015-09-16  5:08         ` Alexei Starovoitov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).