From: Edward Cree <ecree@solarflare.com>
To: <linux-net-drivers@solarflare.com>, <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>
Subject: [RFC PATCH v2 net-next 10/12] net: listify Generic XDP processing, part 1
Date: Tue, 26 Jun 2018 19:21:41 +0100 [thread overview]
Message-ID: <6ca465cf-c070-d4fe-73a8-b3bd8726526d@solarflare.com> (raw)
In-Reply-To: <fa3d7e58-e7b6-ad0c-619f-824c25ed0d97@solarflare.com>
Deals with all the pre- and post-amble to the BPF program itself, which is
still called one packet at a time.
Involves some fiddly percpu variables to cope with XDP_REDIRECT handling.
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
include/linux/filter.h | 10 +++
net/core/dev.c | 165 +++++++++++++++++++++++++++++++++++++++++++------
net/core/filter.c | 10 +--
3 files changed, 156 insertions(+), 29 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..75db6cbf78a3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -820,6 +820,16 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
return 0;
}
+struct redirect_info {
+ u32 ifindex;
+ u32 flags;
+ struct bpf_map *map;
+ struct bpf_map *map_to_flush;
+ unsigned long map_owner;
+};
+
+DECLARE_PER_CPU(struct redirect_info, redirect_info);
+
/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
* same cpu context. Further for best results no more than a single map
* for the do_redirect/do_flush pair should be used. This limitation is
diff --git a/net/core/dev.c b/net/core/dev.c
index 11f80d4502b9..22cbd5314d56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4015,15 +4015,14 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
return rxqueue;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
- struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+static u32 netif_receive_generic_xdp_prepare(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ void **orig_data,
+ void **orig_data_end,
+ u32 *mac_len)
{
struct netdev_rx_queue *rxqueue;
- void *orig_data, *orig_data_end;
- u32 metalen, act = XDP_DROP;
- int hlen, off;
- u32 mac_len;
+ int hlen;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
@@ -4054,19 +4053,35 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* The XDP program wants to see the packet starting at the MAC
* header.
*/
- mac_len = skb->data - skb_mac_header(skb);
- hlen = skb_headlen(skb) + mac_len;
- xdp->data = skb->data - mac_len;
+ *mac_len = skb->data - skb_mac_header(skb);
+ hlen = skb_headlen(skb) + *mac_len;
+ xdp->data = skb->data - *mac_len;
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + hlen;
xdp->data_hard_start = skb->data - skb_headroom(skb);
- orig_data_end = xdp->data_end;
- orig_data = xdp->data;
+ *orig_data_end = xdp->data_end;
+ *orig_data = xdp->data;
rxqueue = netif_get_rxqueue(skb);
xdp->rxq = &rxqueue->xdp_rxq;
+ /* is actually XDP_ABORTED, but here we use it to mean "go ahead and
+ * run the xdp program"
+ */
+ return 0;
+do_drop:
+ kfree_skb(skb);
+ return XDP_DROP;
+}
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+static u32 netif_receive_generic_xdp_finish(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog,
+ void *orig_data,
+ void *orig_data_end,
+ u32 act, u32 mac_len)
+{
+ u32 metalen;
+ int off;
off = xdp->data - orig_data;
if (off > 0)
@@ -4082,7 +4097,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (off != 0) {
skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
skb->len -= off;
-
}
switch (act) {
@@ -4102,7 +4116,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
trace_xdp_exception(skb->dev, xdp_prog, act);
/* fall through */
case XDP_DROP:
- do_drop:
kfree_skb(skb);
break;
}
@@ -4110,6 +4123,23 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
return act;
}
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ void *orig_data, *orig_data_end;
+ u32 act, mac_len;
+
+ act = netif_receive_generic_xdp_prepare(skb, xdp, &orig_data,
+ &orig_data_end, &mac_len);
+ if (act)
+ return act;
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ return netif_receive_generic_xdp_finish(skb, xdp, xdp_prog,
+ orig_data, orig_data_end, act,
+ mac_len);
+}
+
/* When doing generic XDP we have to bypass the qdisc layer and the
* network taps in order to match in-driver-XDP behavior.
*/
@@ -4168,6 +4198,93 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
+struct bpf_work {
+ struct list_head list;
+ void *ctx;
+ struct redirect_info ri;
+ unsigned long ret;
+};
+
+struct xdp_work {
+ struct bpf_work w;
+ struct xdp_buff xdp;
+ struct sk_buff *skb;
+ void *orig_data;
+ void *orig_data_end;
+ u32 mac_len;
+};
+
+/* Storage area for per-packet Generic XDP metadata */
+static DEFINE_PER_CPU(struct xdp_work[NAPI_POLL_WEIGHT], xdp_work);
+
+static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
+ struct sk_buff_head *list,
+ struct sk_buff_head *pass_list)
+{
+ struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
+ struct bpf_work *bw;
+ struct sk_buff *skb;
+ LIST_HEAD(xdp_list);
+ int n = 0, i, err;
+ u32 act;
+
+ if (!xdp_prog) {
+ /* PASS everything */
+ skb_queue_splice_init(list, pass_list);
+ return;
+ }
+
+ xwa = this_cpu_ptr(&xdp_work);
+
+ skb_queue_for_each(skb, list) {
+ if (WARN_ON(n > NAPI_POLL_WEIGHT))
+ /* checked in caller, can't happen */
+ return;
+ xw = (*xwa) + n++;
+ memset(xw, 0, sizeof(*xw));
+ xw->skb = skb;
+ xw->w.ctx = &xw->xdp;
+ act = netif_receive_generic_xdp_prepare(skb, &xw->xdp,
+ &xw->orig_data,
+ &xw->orig_data_end,
+ &xw->mac_len);
+ if (act)
+ xw->w.ret = act;
+ else
+ list_add_tail(&xw->w.list, &xdp_list);
+ }
+
+ list_for_each_entry(bw, &xdp_list, list) {
+ bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
+ bw->ri = *this_cpu_ptr(&redirect_info);
+ }
+
+ for (i = 0; i < n; i++) {
+ xw = (*xwa) + i;
+ act = netif_receive_generic_xdp_finish(xw->skb, &xw->xdp,
+ xdp_prog, xw->orig_data,
+ xw->orig_data_end,
+ xw->w.ret, xw->mac_len);
+ if (act != XDP_PASS) {
+ switch (act) {
+ case XDP_REDIRECT:
+ *this_cpu_ptr(&redirect_info) = xw->w.ri;
+ err = xdp_do_generic_redirect(xw->skb->dev,
+ xw->skb, &xw->xdp,
+ xdp_prog);
+ if (err) /* free and drop */
+ kfree_skb(xw->skb);
+ break;
+ case XDP_TX:
+ generic_xdp_tx(xw->skb, xdp_prog);
+ break;
+ }
+ } else {
+ __skb_queue_tail(pass_list, xw->skb);
+ }
+ }
+}
+
static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
@@ -4878,7 +4995,7 @@ static void netif_receive_skb_list_internal(struct sk_buff_head *list)
{
/* Two sublists so we can go back and forth between them */
struct sk_buff_head sublist, sublist2;
- struct bpf_prog *xdp_prog = NULL;
+ struct bpf_prog *xdp_prog = NULL, *curr_prog = NULL;
struct sk_buff *skb;
__skb_queue_head_init(&sublist);
@@ -4893,15 +5010,23 @@ static void netif_receive_skb_list_internal(struct sk_buff_head *list)
__skb_queue_head_init(&sublist2);
if (static_branch_unlikely(&generic_xdp_needed_key)) {
+ struct sk_buff_head sublist3;
+ int n = 0;
+
+ __skb_queue_head_init(&sublist3);
preempt_disable();
rcu_read_lock();
while ((skb = __skb_dequeue(&sublist)) != NULL) {
xdp_prog = rcu_dereference(skb->dev->xdp_prog);
- if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
- /* Dropped, don't add to sublist */
- continue;
- __skb_queue_tail(&sublist2, skb);
+ if (++n >= NAPI_POLL_WEIGHT || xdp_prog != curr_prog) {
+ do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
+ __skb_queue_head_init(&sublist3);
+ n = 0;
+ curr_prog = xdp_prog;
+ }
+ __skb_queue_tail(&sublist3, skb);
}
+ do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
rcu_read_unlock();
preempt_enable();
/* Move all packets onto first sublist */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..c96aff14d76a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2039,15 +2039,7 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
-struct redirect_info {
- u32 ifindex;
- u32 flags;
- struct bpf_map *map;
- struct bpf_map *map_to_flush;
- unsigned long map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct redirect_info, redirect_info);
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
next prev parent reply other threads:[~2018-06-26 18:21 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-26 18:15 [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 01/12] net: core: trivial netif_receive_skb_list() entry point Edward Cree
2018-06-27 0:06 ` Eric Dumazet
2018-06-27 14:03 ` Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 02/12] sfc: batch up RX delivery Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 03/12] net: core: unwrap skb list receive slightly further Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 04/12] net: core: Another step of skb receive list processing Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 05/12] net: core: another layer of lists, around PF_MEMALLOC skb handling Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup Edward Cree
2018-06-27 14:36 ` Willem de Bruijn
2018-06-27 14:49 ` Edward Cree
2018-06-27 16:00 ` Willem de Bruijn
2018-06-27 16:34 ` Edward Cree
2018-06-26 18:20 ` [RFC PATCH v2 net-next 07/12] net: ipv4: listified version of ip_rcv Edward Cree
2018-06-27 12:32 ` Florian Westphal
2018-06-26 18:20 ` [RFC PATCH v2 net-next 08/12] net: ipv4: listify ip_rcv_finish Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 09/12] net: don't bother calling list RX functions on empty lists Edward Cree
2018-06-26 18:21 ` Edward Cree [this message]
2018-06-26 18:22 ` [RFC PATCH v2 net-next 11/12] net: listify Generic XDP processing, part 2 Edward Cree
2018-06-26 18:22 ` [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP processing on x86_64 Edward Cree
2018-06-26 20:48 ` [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Tom Herbert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6ca465cf-c070-d4fe-73a8-b3bd8726526d@solarflare.com \
--to=ecree@solarflare.com \
--cc=davem@davemloft.net \
--cc=linux-net-drivers@solarflare.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox