From: Shrijeet Mukherjee <shrijeet@gmail.com>
To: mst@redhat.com, tom@herbertland.com
Cc: netdev@vger.kernel.org, shm@cumulusnetworks.com,
roopa@cumulusnetworks.com, nikolay@cumulusnetworks.com
Subject: [PATCH net-next RFC WIP] Patch for XDP support for virtio_net
Date: Sat, 22 Oct 2016 04:07:23 +0000 [thread overview]
Message-ID: <1477109243-29520-1-git-send-email-shrijeet@gmail.com> (raw)
This patch adds support for xdp ndo and also inserts the xdp program
call into the merged RX buffers and big buffers paths
* The small packet skb receive is skipped for now
* No TX for now
Signed-off-by: Shrijeet Mukherjee <shrijeet@gmail.com>
---
drivers/net/virtio_net.c | 133 ++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 127 insertions(+), 6 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fad84f3..d5af3f7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
+#include <linux/bpf.h>
#include <linux/scatterlist.h>
#include <linux/if_vlan.h>
#include <linux/slab.h>
@@ -81,6 +82,8 @@ struct receive_queue {
struct napi_struct napi;
+ struct bpf_prog *xdp_prog;
+
/* Chain pages by the private ptr. */
struct page *pages;
@@ -324,6 +327,51 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
return skb;
}
+/* this function is not called from the receive_buf path directly as
+ * we want to use the page model for rx merge buffer and big buffers
+ * and not use the fast path for driving skb's around
+ */
+static inline u32 do_xdp_prog(struct virtnet_info *vi,
+ struct receive_queue *rq,
+ void *buf, int offset, int len)
+{
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+ int hdr_padded_len;
+ u32 act;
+
+ /* A bpf program gets first chance to drop the packet. It may
+ * read bytes but not past the end of the frag.
+ */
+
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+ if (xdp_prog) {
+ if (vi->mergeable_rx_bufs)
+ hdr_padded_len = sizeof(
+ struct virtio_net_hdr_mrg_rxbuf);
+ else
+ hdr_padded_len = sizeof(struct padded_vnet_hdr);
+
+ buf = (void *)((unsigned long)buf + offset + hdr_padded_len);
+
+ xdp.data = buf;
+ xdp.data_end = xdp.data + len;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ return XDP_PASS;
+ case XDP_TX:
+ case XDP_ABORTED:
+ case XDP_DROP:
+ return XDP_DROP;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ }
+ }
+ return XDP_PASS;
+}
+
static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
{
struct sk_buff * skb = buf;
@@ -341,8 +389,14 @@ static struct sk_buff *receive_big(struct net_device *dev,
unsigned int len)
{
struct page *page = buf;
- struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+ struct sk_buff *skb;
+ u32 act;
+ act = do_xdp_prog(vi, rq, buf, 0, len);
+ if (act == XDP_DROP)
+ goto err;
+
+ skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
if (unlikely(!skb))
goto err;
@@ -366,13 +420,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+ u32 act;
+ struct sk_buff *head_skb, *curr_skb;
- struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
- truesize);
- struct sk_buff *curr_skb = head_skb;
+ act = do_xdp_prog(vi, rq, buf, offset, len);
+ /* controversial, but alternative is to create an SKB anyway then */
+ if (act == XDP_DROP) {
+ put_page(page);
+ return NULL;
+ }
+
+ head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
+ curr_skb = head_skb;
if (unlikely(!curr_skb))
goto err_skb;
+
while (--num_buf) {
int num_skb_frags;
@@ -388,6 +451,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
buf = mergeable_ctx_to_buf_address(ctx);
page = virt_to_head_page(buf);
+ offset = buf - page_address(page);
+
+ act = do_xdp_prog(vi, rq, buf, offset, len);
+ if (act != XDP_PASS) {
+ put_page(page);
+ continue;
+ }
num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -409,7 +479,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
head_skb->len += len;
head_skb->truesize += truesize;
}
- offset = buf - page_address(page);
if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
put_page(page);
skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
@@ -1430,6 +1499,52 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
+static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+ int i;
+
+ if (prog) {
+ prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ old_prog = rcu_dereference(vi->rq[i].xdp_prog);
+ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+ return 0;
+}
+
+static int virtnet_xdp_query(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (vi->rq[i].xdp_prog)
+ return 1;
+ }
+ return 0;
+}
+
+static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return virtnet_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ return virtnet_xdp_query(dev);
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops virtnet_netdev = {
.ndo_open = virtnet_open,
.ndo_stop = virtnet_close,
@@ -1447,6 +1562,7 @@ static const struct net_device_ops virtnet_netdev = {
#ifdef CONFIG_NET_RX_BUSY_POLL
.ndo_busy_poll = virtnet_busy_poll,
#endif
+ .ndo_xdp = virtnet_xdp,
};
static void virtnet_config_changed_work(struct work_struct *work)
@@ -1503,11 +1619,17 @@ static void virtnet_free_queues(struct virtnet_info *vi)
static void free_receive_bufs(struct virtnet_info *vi)
{
+ struct bpf_prog *old_prog;
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
while (vi->rq[i].pages)
__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
+
+ old_prog = rcu_dereference(vi->rq[i].xdp_prog);
+ RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
+ if (old_prog)
+ bpf_prog_put(old_prog);
}
}
@@ -1878,7 +2000,6 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtnet_change_mtu(dev, mtu))
__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
}
-
if (vi->any_header_sg)
dev->needed_headroom = vi->hdr_len;
--
2.1.4
next reply other threads:[~2016-10-22 4:07 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-22 4:07 Shrijeet Mukherjee [this message]
2016-10-23 16:38 ` [PATCH net-next RFC WIP] Patch for XDP support for virtio_net Stephen Hemminger
2016-10-24 1:51 ` Shrijeet Mukherjee
2016-10-25 1:10 ` Alexei Starovoitov
2016-10-25 17:36 ` Jakub Kicinski
2016-10-26 13:52 ` Jesper Dangaard Brouer
2016-10-26 16:36 ` Michael S. Tsirkin
2016-10-26 16:52 ` David Miller
2016-10-26 17:07 ` Michael S. Tsirkin
2016-10-26 17:11 ` David Miller
2016-10-27 8:55 ` Jesper Dangaard Brouer
2016-10-27 21:09 ` John Fastabend
2016-10-27 21:30 ` Michael S. Tsirkin
2016-10-27 21:42 ` David Miller
2016-10-27 22:25 ` Michael S. Tsirkin
2016-10-28 1:35 ` David Miller
2016-10-28 1:43 ` Alexander Duyck
2016-10-28 2:10 ` David Miller
2016-10-28 15:56 ` John Fastabend
2016-10-28 16:18 ` Jakub Kicinski
2016-10-28 18:22 ` Alexei Starovoitov
2016-10-28 20:35 ` Alexander Duyck
2016-10-28 20:42 ` Jakub Kicinski
2016-10-28 20:36 ` Jakub Kicinski
2016-10-29 3:51 ` Shrijeet Mukherjee
2016-10-29 11:25 ` Thomas Graf
2016-11-02 14:27 ` Jesper Dangaard Brouer
2016-11-03 1:28 ` Shrijeet Mukherjee
2016-11-03 4:11 ` Michael S. Tsirkin
2016-11-03 6:44 ` John Fastabend
2016-11-03 22:20 ` John Fastabend
2016-11-03 22:42 ` Michael S. Tsirkin
2016-11-03 23:29 ` John Fastabend
2016-11-04 0:34 ` Michael S. Tsirkin
2016-11-04 23:05 ` John Fastabend
2016-11-06 6:50 ` Michael S. Tsirkin
2016-10-28 17:11 ` David Miller
2016-10-30 22:53 ` Michael S. Tsirkin
2016-11-02 14:01 ` Jesper Dangaard Brouer
2016-11-02 16:06 ` Alexander Duyck
2016-10-28 0:02 ` Shrijeet Mukherjee
2016-10-28 0:46 ` Shrijeet Mukherjee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1477109243-29520-1-git-send-email-shrijeet@gmail.com \
--to=shrijeet@gmail.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=nikolay@cumulusnetworks.com \
--cc=roopa@cumulusnetworks.com \
--cc=shm@cumulusnetworks.com \
--cc=tom@herbertland.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).