Netdev List
 help / color / mirror / Atom feed
From: Edward Cree <ecree@solarflare.com>
To: <linux-net-drivers@solarflare.com>, <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>
Subject: [RFC PATCH v2 net-next 10/12] net: listify Generic XDP processing, part 1
Date: Tue, 26 Jun 2018 19:21:41 +0100	[thread overview]
Message-ID: <6ca465cf-c070-d4fe-73a8-b3bd8726526d@solarflare.com> (raw)
In-Reply-To: <fa3d7e58-e7b6-ad0c-619f-824c25ed0d97@solarflare.com>

Deals with all the pre- and post-amble to the BPF program itself, which is
 still called one packet at a time.
Involves some fiddly percpu variables to cope with XDP_REDIRECT handling.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 include/linux/filter.h |  10 +++
 net/core/dev.c         | 165 +++++++++++++++++++++++++++++++++++++++++++------
 net/core/filter.c      |  10 +--
 3 files changed, 156 insertions(+), 29 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..75db6cbf78a3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -820,6 +820,16 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
 	return 0;
 }
 
+struct redirect_info {
+	u32 ifindex;
+	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
+	unsigned long   map_owner;
+};
+
+DECLARE_PER_CPU(struct redirect_info, redirect_info);
+
 /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
diff --git a/net/core/dev.c b/net/core/dev.c
index 11f80d4502b9..22cbd5314d56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4015,15 +4015,14 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
 	return rxqueue;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-				     struct xdp_buff *xdp,
-				     struct bpf_prog *xdp_prog)
+static u32 netif_receive_generic_xdp_prepare(struct sk_buff *skb,
+					     struct xdp_buff *xdp,
+					     void **orig_data,
+					     void **orig_data_end,
+					     u32 *mac_len)
 {
 	struct netdev_rx_queue *rxqueue;
-	void *orig_data, *orig_data_end;
-	u32 metalen, act = XDP_DROP;
-	int hlen, off;
-	u32 mac_len;
+	int hlen;
 
 	/* Reinjected packets coming from act_mirred or similar should
 	 * not get XDP generic processing.
@@ -4054,19 +4053,35 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	/* The XDP program wants to see the packet starting at the MAC
 	 * header.
 	 */
-	mac_len = skb->data - skb_mac_header(skb);
-	hlen = skb_headlen(skb) + mac_len;
-	xdp->data = skb->data - mac_len;
+	*mac_len = skb->data - skb_mac_header(skb);
+	hlen = skb_headlen(skb) + *mac_len;
+	xdp->data = skb->data - *mac_len;
 	xdp->data_meta = xdp->data;
 	xdp->data_end = xdp->data + hlen;
 	xdp->data_hard_start = skb->data - skb_headroom(skb);
-	orig_data_end = xdp->data_end;
-	orig_data = xdp->data;
+	*orig_data_end = xdp->data_end;
+	*orig_data = xdp->data;
 
 	rxqueue = netif_get_rxqueue(skb);
 	xdp->rxq = &rxqueue->xdp_rxq;
+	/* is actually XDP_ABORTED, but here we use it to mean "go ahead and
+	 * run the xdp program"
+	 */
+	return 0;
+do_drop:
+	kfree_skb(skb);
+	return XDP_DROP;
+}
 
-	act = bpf_prog_run_xdp(xdp_prog, xdp);
+static u32 netif_receive_generic_xdp_finish(struct sk_buff *skb,
+					    struct xdp_buff *xdp,
+					    struct bpf_prog *xdp_prog,
+					    void *orig_data,
+					    void *orig_data_end,
+					    u32 act, u32 mac_len)
+{
+	u32 metalen;
+	int off;
 
 	off = xdp->data - orig_data;
 	if (off > 0)
@@ -4082,7 +4097,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	if (off != 0) {
 		skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
 		skb->len -= off;
-
 	}
 
 	switch (act) {
@@ -4102,7 +4116,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		trace_xdp_exception(skb->dev, xdp_prog, act);
 		/* fall through */
 	case XDP_DROP:
-	do_drop:
 		kfree_skb(skb);
 		break;
 	}
@@ -4110,6 +4123,23 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	return act;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct xdp_buff *xdp,
+				     struct bpf_prog *xdp_prog)
+{
+	void *orig_data, *orig_data_end;
+	u32 act, mac_len;
+
+	act = netif_receive_generic_xdp_prepare(skb, xdp, &orig_data,
+						&orig_data_end, &mac_len);
+	if (act)
+		return act;
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	return netif_receive_generic_xdp_finish(skb, xdp, xdp_prog,
+						orig_data, orig_data_end, act,
+						mac_len);
+}
+
 /* When doing generic XDP we have to bypass the qdisc layer and the
  * network taps in order to match in-driver-XDP behavior.
  */
@@ -4168,6 +4198,93 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(do_xdp_generic);
 
+struct bpf_work {
+	struct list_head list;
+	void *ctx;
+	struct redirect_info ri;
+	unsigned long ret;
+};
+
+struct xdp_work {
+	struct bpf_work w;
+	struct xdp_buff xdp;
+	struct sk_buff *skb;
+	void *orig_data;
+	void *orig_data_end;
+	u32 mac_len;
+};
+
+/* Storage area for per-packet Generic XDP metadata */
+static DEFINE_PER_CPU(struct xdp_work[NAPI_POLL_WEIGHT], xdp_work);
+
+static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
+				struct sk_buff_head *list,
+				struct sk_buff_head *pass_list)
+{
+	struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
+	struct bpf_work *bw;
+	struct sk_buff *skb;
+	LIST_HEAD(xdp_list);
+	int n = 0, i, err;
+	u32 act;
+
+	if (!xdp_prog) {
+		/* PASS everything */
+		skb_queue_splice_init(list, pass_list);
+		return;
+	}
+
+	xwa = this_cpu_ptr(&xdp_work);
+
+	skb_queue_for_each(skb, list) {
+		if (WARN_ON(n > NAPI_POLL_WEIGHT))
+			 /* checked in caller, can't happen */
+			 return;
+		xw = (*xwa) + n++;
+		memset(xw, 0, sizeof(*xw));
+		xw->skb = skb;
+		xw->w.ctx = &xw->xdp;
+		act = netif_receive_generic_xdp_prepare(skb, &xw->xdp,
+							&xw->orig_data,
+							&xw->orig_data_end,
+							&xw->mac_len);
+		if (act)
+			xw->w.ret = act;
+		else
+			list_add_tail(&xw->w.list, &xdp_list);
+	}
+
+	list_for_each_entry(bw, &xdp_list, list) {
+		bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
+		bw->ri = *this_cpu_ptr(&redirect_info);
+	}
+
+	for (i = 0; i < n; i++) {
+		xw = (*xwa) + i;
+		act = netif_receive_generic_xdp_finish(xw->skb, &xw->xdp,
+						       xdp_prog, xw->orig_data,
+						       xw->orig_data_end,
+						       xw->w.ret, xw->mac_len);
+		if (act != XDP_PASS) {
+			switch (act) {
+			case XDP_REDIRECT:
+				*this_cpu_ptr(&redirect_info) = xw->w.ri;
+				err = xdp_do_generic_redirect(xw->skb->dev,
+							      xw->skb, &xw->xdp,
+							      xdp_prog);
+				if (err) /* free and drop */
+					kfree_skb(xw->skb);
+				break;
+			case XDP_TX:
+				generic_xdp_tx(xw->skb, xdp_prog);
+				break;
+			}
+		} else {
+			__skb_queue_tail(pass_list, xw->skb);
+		}
+	}
+}
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4878,7 +4995,7 @@ static void netif_receive_skb_list_internal(struct sk_buff_head *list)
 {
 	/* Two sublists so we can go back and forth between them */
 	struct sk_buff_head sublist, sublist2;
-	struct bpf_prog *xdp_prog = NULL;
+	struct bpf_prog *xdp_prog = NULL, *curr_prog = NULL;
 	struct sk_buff *skb;
 
 	__skb_queue_head_init(&sublist);
@@ -4893,15 +5010,23 @@ static void netif_receive_skb_list_internal(struct sk_buff_head *list)
 
 	__skb_queue_head_init(&sublist2);
 	if (static_branch_unlikely(&generic_xdp_needed_key)) {
+		struct sk_buff_head sublist3;
+		int n = 0;
+
+		__skb_queue_head_init(&sublist3);
 		preempt_disable();
 		rcu_read_lock();
 		while ((skb = __skb_dequeue(&sublist)) != NULL) {
 			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-			if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
-				/* Dropped, don't add to sublist */
-				continue;
-			__skb_queue_tail(&sublist2, skb);
+			if (++n >= NAPI_POLL_WEIGHT || xdp_prog != curr_prog) {
+				do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
+				__skb_queue_head_init(&sublist3);
+				n = 0;
+				curr_prog = xdp_prog;
+			}
+			__skb_queue_tail(&sublist3, skb);
 		}
+		do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
 		rcu_read_unlock();
 		preempt_enable();
 		/* Move all packets onto first sublist */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..c96aff14d76a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2039,15 +2039,7 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
-struct redirect_info {
-	u32 ifindex;
-	u32 flags;
-	struct bpf_map *map;
-	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct redirect_info, redirect_info);
 
 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {

  parent reply	other threads:[~2018-06-26 18:21 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-26 18:15 [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 01/12] net: core: trivial netif_receive_skb_list() entry point Edward Cree
2018-06-27  0:06   ` Eric Dumazet
2018-06-27 14:03     ` Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 02/12] sfc: batch up RX delivery Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 03/12] net: core: unwrap skb list receive slightly further Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 04/12] net: core: Another step of skb receive list processing Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 05/12] net: core: another layer of lists, around PF_MEMALLOC skb handling Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup Edward Cree
2018-06-27 14:36   ` Willem de Bruijn
2018-06-27 14:49     ` Edward Cree
2018-06-27 16:00       ` Willem de Bruijn
2018-06-27 16:34         ` Edward Cree
2018-06-26 18:20 ` [RFC PATCH v2 net-next 07/12] net: ipv4: listified version of ip_rcv Edward Cree
2018-06-27 12:32   ` Florian Westphal
2018-06-26 18:20 ` [RFC PATCH v2 net-next 08/12] net: ipv4: listify ip_rcv_finish Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 09/12] net: don't bother calling list RX functions on empty lists Edward Cree
2018-06-26 18:21 ` Edward Cree [this message]
2018-06-26 18:22 ` [RFC PATCH v2 net-next 11/12] net: listify Generic XDP processing, part 2 Edward Cree
2018-06-26 18:22 ` [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP processing on x86_64 Edward Cree
2018-06-26 20:48 ` [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Tom Herbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6ca465cf-c070-d4fe-73a8-b3bd8726526d@solarflare.com \
    --to=ecree@solarflare.com \
    --cc=davem@davemloft.net \
    --cc=linux-net-drivers@solarflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox