netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO
@ 2016-11-29 20:09 John Fastabend
  2016-11-29 20:09 ` [net-next PATCH v3 2/6] net: xdp: add invalid buffer warning John Fastabend
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: John Fastabend @ 2016-11-29 20:09 UTC (permalink / raw)
  To: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer

This adds support for dynamically setting the LRO feature flag. The
message to control guest features in the backend uses the
CTRL_GUEST_OFFLOADS msg type.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ca5239a..8189e5b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1419,6 +1419,41 @@ static void virtnet_init_settings(struct net_device *dev)
 	.set_settings = virtnet_set_settings,
 };
 
+static int virtnet_set_features(struct net_device *netdev,
+				netdev_features_t features)
+{
+	struct virtnet_info *vi = netdev_priv(netdev);
+	struct virtio_device *vdev = vi->vdev;
+	struct scatterlist sg;
+	u64 offloads = 0;
+
+	if (features & NETIF_F_LRO)
+		offloads |= (1 << VIRTIO_NET_F_GUEST_TSO4) |
+			    (1 << VIRTIO_NET_F_GUEST_TSO6);
+
+	if (features & NETIF_F_RXCSUM)
+		offloads |= (1 << VIRTIO_NET_F_GUEST_CSUM);
+
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
+		sg_init_one(&sg, &offloads, sizeof(uint64_t));
+		if (!virtnet_send_command(vi,
+					  VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+					  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
+					  &sg)) {
+			dev_warn(&netdev->dev,
+				 "Failed to set guest offloads by virtnet command.\n");
+			return -EINVAL;
+		}
+	} else if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) &&
+		   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+		dev_warn(&netdev->dev,
+			 "No support for setting offloads pre version_1.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -1435,6 +1470,7 @@ static void virtnet_init_settings(struct net_device *dev)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= virtnet_busy_poll,
 #endif
+	.ndo_set_features	= virtnet_set_features,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
@@ -1810,6 +1846,12 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
 		dev->features |= NETIF_F_RXCSUM;
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) &&
+	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) {
+		dev->features |= NETIF_F_LRO;
+		dev->hw_features |= NETIF_F_LRO;
+	}
+
 	dev->vlan_features = dev->features;
 
 	/* MTU range: 68 - 65535 */
@@ -2047,7 +2089,8 @@ static int virtnet_restore(struct virtio_device *vdev)
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
-	VIRTIO_NET_F_MTU
+	VIRTIO_NET_F_MTU, \
+	VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
 
 static unsigned int features[] = {
 	VIRTNET_FEATURES,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [net-next PATCH v3 2/6] net: xdp: add invalid buffer warning
  2016-11-29 20:09 [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO John Fastabend
@ 2016-11-29 20:09 ` John Fastabend
  2016-11-29 20:10 ` [net-next PATCH v3 3/6] virtio_net: Add XDP support John Fastabend
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: John Fastabend @ 2016-11-29 20:09 UTC (permalink / raw)
  To: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer

This adds a warning for drivers to use when encountering an invalid
buffer for XDP. For normal cases this should not happen but to catch
this in virtual/qemu setups that I may not have expected from the
emulation layer having a standard warning is useful.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 include/linux/filter.h |    1 +
 net/core/filter.c      |    6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1f09c52..0c79004 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -595,6 +595,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_buffer(void);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/net/core/filter.c b/net/core/filter.c
index dece94f..5f3ed4e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2774,6 +2774,12 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+void bpf_warn_invalid_xdp_buffer(void)
+{
+	WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n");
+}
+EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
+
 static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 					int src_reg, int ctx_off,
 					struct bpf_insn *insn_buf,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [net-next PATCH v3 3/6] virtio_net: Add XDP support
  2016-11-29 20:09 [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO John Fastabend
  2016-11-29 20:09 ` [net-next PATCH v3 2/6] net: xdp: add invalid buffer warning John Fastabend
@ 2016-11-29 20:10 ` John Fastabend
  2016-11-30 18:54   ` Michael S. Tsirkin
  2016-11-29 20:10 ` [net-next PATCH v3 4/6] virtio_net: add dedicated XDP transmit queues John Fastabend
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: John Fastabend @ 2016-11-29 20:10 UTC (permalink / raw)
  To: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer

From: John Fastabend <john.fastabend@gmail.com>

This adds XDP support to virtio_net. Some requirements must be
met for XDP to be enabled depending on the mode. First it will
only be supported with LRO disabled so that data is not pushed
across multiple buffers. Second the MTU must be less than a page
size to avoid having to handle XDP across multiple pages.

If mergeable receive is enabled this patch only supports the case
where header and data are in the same buf which we can check when
a packet is received by looking at num_buf. If the num_buf is
greater than 1 and a XDP program is loaded the packet is dropped
and a warning is thrown. When any_header_sg is set this does not
happen and both header and data is put in a single buffer as expected
so we check this when XDP programs are loaded.  Subsequent patches
will process the packet in a degraded mode to ensure connectivity
and correctness is not lost even if backend pushes packets into
multiple buffers.

If big packets mode is enabled and MTU/LRO conditions above are
met then XDP is allowed.

This patch was tested with qemu with vhost=on and vhost=off where
mergable and big_packet modes were forced via hard coding feature
negotiation. Multiple buffers per packet was forced via a small
test patch to vhost.c in the vhost=on qemu mode.

Suggested-by: Shrijeet Mukherjee <shrijeet@gmail.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |  154 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 150 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8189e5b..32126bf 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_net.h>
+#include <linux/bpf.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
 #include <linux/slab.h>
@@ -81,6 +82,8 @@ struct receive_queue {
 
 	struct napi_struct napi;
 
+	struct bpf_prog __rcu *xdp_prog;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
@@ -324,6 +327,38 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	return skb;
 }
 
+static u32 do_xdp_prog(struct virtnet_info *vi,
+		       struct bpf_prog *xdp_prog,
+		       struct page *page, int offset, int len)
+{
+	int hdr_padded_len;
+	struct xdp_buff xdp;
+	u32 act;
+	u8 *buf;
+
+	buf = page_address(page) + offset;
+
+	if (vi->mergeable_rx_bufs)
+		hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	else
+		hdr_padded_len = sizeof(struct padded_vnet_hdr);
+
+	xdp.data = buf + hdr_padded_len;
+	xdp.data_end = xdp.data + (len - vi->hdr_len);
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+	switch (act) {
+	case XDP_PASS:
+		return XDP_PASS;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	case XDP_TX:
+	case XDP_ABORTED:
+	case XDP_DROP:
+		return XDP_DROP;
+	}
+}
+
 static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
 {
 	struct sk_buff * skb = buf;
@@ -340,14 +375,28 @@ static struct sk_buff *receive_big(struct net_device *dev,
 				   void *buf,
 				   unsigned int len)
 {
+	struct bpf_prog *xdp_prog;
 	struct page *page = buf;
-	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+	struct sk_buff *skb;
 
+	rcu_read_lock();
+	xdp_prog = rcu_dereference(rq->xdp_prog);
+	if (xdp_prog) {
+		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
+
+		if (act == XDP_DROP)
+			goto err_xdp;
+	}
+	rcu_read_unlock();
+
+	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
 	if (unlikely(!skb))
 		goto err;
 
 	return skb;
 
+err_xdp:
+	rcu_read_unlock();
 err:
 	dev->stats.rx_dropped++;
 	give_pages(rq, page);
@@ -366,10 +415,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct page *page = virt_to_head_page(buf);
 	int offset = buf - page_address(page);
 	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+	struct sk_buff *head_skb, *curr_skb;
+	struct bpf_prog *xdp_prog;
 
-	struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
-					       truesize);
-	struct sk_buff *curr_skb = head_skb;
+	rcu_read_lock();
+	xdp_prog = rcu_dereference(rq->xdp_prog);
+	if (xdp_prog) {
+		u32 act;
+
+		if (num_buf > 1) {
+			bpf_warn_invalid_xdp_buffer();
+			goto err_xdp;
+		}
+
+		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
+		if (act == XDP_DROP)
+			goto err_xdp;
+	}
+	rcu_read_unlock();
+
+	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
+	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))
 		goto err_skb;
@@ -423,6 +489,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
 	return head_skb;
 
+err_xdp:
+	rcu_read_unlock();
 err_skb:
 	put_page(page);
 	while (--num_buf) {
@@ -1328,6 +1396,13 @@ static int virtnet_set_channels(struct net_device *dev,
 	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
 		return -EINVAL;
 
+	/* For now we don't support modifying channels while XDP is loaded
+	 * also when XDP is loaded all RX queues have XDP programs so we only
+	 * need to check a single RX queue.
+	 */
+	if (vi->rq[0].xdp_prog)
+		return -EINVAL;
+
 	get_online_cpus();
 	err = virtnet_set_queues(vi, queue_pairs);
 	if (!err) {
@@ -1454,6 +1529,68 @@ static int virtnet_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+	int i;
+
+	if ((dev->features & NETIF_F_LRO) && prog) {
+		netdev_warn(dev, "can't set XDP while LRO is on, disable LRO first\n");
+		return -EINVAL;
+	}
+
+	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
+		netdev_warn(dev, "XDP expects header/data in single page\n");
+		return -EINVAL;
+	}
+
+	if (dev->mtu > PAGE_SIZE) {
+		netdev_warn(dev, "XDP requires MTU less than %lu\n", PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	if (prog) {
+		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
+	}
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
+		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
+
+	return 0;
+}
+
+static bool virtnet_xdp_query(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		if (vi->rq[i].xdp_prog)
+			return true;
+	}
+	return false;
+}
+
+static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return virtnet_xdp_set(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = virtnet_xdp_query(dev);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -1471,6 +1608,7 @@ static int virtnet_set_features(struct net_device *netdev,
 	.ndo_busy_poll		= virtnet_busy_poll,
 #endif
 	.ndo_set_features	= virtnet_set_features,
+	.ndo_xdp		= virtnet_xdp,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
@@ -1527,12 +1665,20 @@ static void virtnet_free_queues(struct virtnet_info *vi)
 
 static void free_receive_bufs(struct virtnet_info *vi)
 {
+	struct bpf_prog *old_prog;
 	int i;
 
+	rtnl_lock();
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		while (vi->rq[i].pages)
 			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
+
+		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
+		RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
+		if (old_prog)
+			bpf_prog_put(old_prog);
 	}
+	rtnl_unlock();
 }
 
 static void free_receive_page_frags(struct virtnet_info *vi)

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [net-next PATCH v3 4/6] virtio_net: add dedicated XDP transmit queues
  2016-11-29 20:09 [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO John Fastabend
  2016-11-29 20:09 ` [net-next PATCH v3 2/6] net: xdp: add invalid buffer warning John Fastabend
  2016-11-29 20:10 ` [net-next PATCH v3 3/6] virtio_net: Add XDP support John Fastabend
@ 2016-11-29 20:10 ` John Fastabend
  2016-11-29 20:11 ` [net-next PATCH v3 5/6] virtio_net: add XDP_TX support John Fastabend
  2016-11-29 20:11 ` [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers John Fastabend
  4 siblings, 0 replies; 15+ messages in thread
From: John Fastabend @ 2016-11-29 20:10 UTC (permalink / raw)
  To: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer

XDP requires using isolated transmit queues to avoid interference
with normal networking stack (BQL, NETDEV_TX_BUSY, etc). This patch
adds a XDP queue per cpu when a XDP program is loaded and does not
expose the queues to the OS via the normal API call to
netif_set_real_num_tx_queues(). This way the stack will never push
an skb to these queues.

However virtio/vhost/qemu implementation only allows for creating
TX/RX queue pairs at this time so creating only TX queues was not
possible. And because the associated RX queues are being created I
went ahead and exposed these to the stack and let the backend use
them. This creates more RX queues visible to the network stack than
TX queues which is worth mentioning but does not cause any issues as
far as I can tell.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 32126bf..a1bfa99 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -114,6 +114,9 @@ struct virtnet_info {
 	/* # of queue pairs currently used by the driver */
 	u16 curr_queue_pairs;
 
+	/* # of XDP queue pairs currently used by the driver */
+	u16 xdp_queue_pairs;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
@@ -1533,7 +1536,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct bpf_prog *old_prog;
-	int i;
+	u16 xdp_qp = 0, curr_qp;
+	int err, i;
 
 	if ((dev->features & NETIF_F_LRO) && prog) {
 		netdev_warn(dev, "can't set XDP while LRO is on, disable LRO first\n");
@@ -1550,12 +1554,34 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 		return -EINVAL;
 	}
 
+	curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
+	if (prog)
+		xdp_qp = nr_cpu_ids;
+
+	/* XDP requires extra queues for XDP_TX */
+	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
+		netdev_warn(dev, "request %i queues but max is %i\n",
+			    curr_qp + xdp_qp, vi->max_queue_pairs);
+		return -ENOMEM;
+	}
+
+	err = virtnet_set_queues(vi, curr_qp + xdp_qp);
+	if (err) {
+		dev_warn(&dev->dev, "XDP Device queue allocation failure.\n");
+		return err;
+	}
+
 	if (prog) {
 		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
-		if (IS_ERR(prog))
+		if (IS_ERR(prog)) {
+			virtnet_set_queues(vi, curr_qp);
 			return PTR_ERR(prog);
+		}
 	}
 
+	vi->xdp_queue_pairs = xdp_qp;
+	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
 		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [net-next PATCH v3 5/6] virtio_net: add XDP_TX support
  2016-11-29 20:09 [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO John Fastabend
                   ` (2 preceding siblings ...)
  2016-11-29 20:10 ` [net-next PATCH v3 4/6] virtio_net: add dedicated XDP transmit queues John Fastabend
@ 2016-11-29 20:11 ` John Fastabend
  2016-11-30 18:45   ` Michael S. Tsirkin
  2016-11-29 20:11 ` [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers John Fastabend
  4 siblings, 1 reply; 15+ messages in thread
From: John Fastabend @ 2016-11-29 20:11 UTC (permalink / raw)
  To: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer

This adds support for the XDP_TX action to virtio_net. When an XDP
program is run and returns the XDP_TX action the virtio_net XDP
implementation will transmit the packet on a TX queue that aligns
with the current CPU that the XDP packet was processed on.

Before sending the packet the header is zeroed.  Also XDP is expected
to handle checksum correctly so no checksum offload  support is
provided.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   59 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a1bfa99..9604e55 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -330,12 +330,40 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	return skb;
 }
 
+static void virtnet_xdp_xmit(struct virtnet_info *vi,
+			     unsigned int qnum, struct xdp_buff *xdp)
+{
+	struct send_queue *sq = &vi->sq[qnum];
+	struct virtio_net_hdr_mrg_rxbuf *hdr;
+	unsigned int num_sg, len;
+	void *xdp_sent;
+
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		struct page *page = virt_to_head_page(xdp_sent);
+
+		put_page(page);
+	}
+
+	/* Zero header and leave csum up to XDP layers */
+	hdr = xdp->data;
+	memset(hdr, 0, vi->hdr_len);
+	hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+	hdr->hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
+
+	num_sg = 1;
+	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+	virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, xdp->data, GFP_ATOMIC);
+	virtqueue_kick(sq->vq);
+}
+
 static u32 do_xdp_prog(struct virtnet_info *vi,
 		       struct bpf_prog *xdp_prog,
 		       struct page *page, int offset, int len)
 {
 	int hdr_padded_len;
 	struct xdp_buff xdp;
+	unsigned int qp;
 	u32 act;
 	u8 *buf;
 
@@ -353,9 +381,15 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
 	switch (act) {
 	case XDP_PASS:
 		return XDP_PASS;
+	case XDP_TX:
+		qp = vi->curr_queue_pairs -
+			vi->xdp_queue_pairs +
+			smp_processor_id();
+		xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
+		virtnet_xdp_xmit(vi, qp, &xdp);
+		return XDP_TX;
 	default:
 		bpf_warn_invalid_xdp_action(act);
-	case XDP_TX:
 	case XDP_ABORTED:
 	case XDP_DROP:
 		return XDP_DROP;
@@ -387,8 +421,16 @@ static struct sk_buff *receive_big(struct net_device *dev,
 	if (xdp_prog) {
 		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
 
-		if (act == XDP_DROP)
+		switch (act) {
+		case XDP_PASS:
+			break;
+		case XDP_TX:
+			rcu_read_unlock();
+			goto xdp_xmit;
+		case XDP_DROP:
+		default:
 			goto err_xdp;
+		}
 	}
 	rcu_read_unlock();
 
@@ -403,6 +445,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
 err:
 	dev->stats.rx_dropped++;
 	give_pages(rq, page);
+xdp_xmit:
 	return NULL;
 }
 
@@ -421,6 +464,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct sk_buff *head_skb, *curr_skb;
 	struct bpf_prog *xdp_prog;
 
+	head_skb = NULL;
+
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
@@ -432,8 +477,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		}
 
 		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
-		if (act == XDP_DROP)
+		switch (act) {
+		case XDP_PASS:
+			break;
+		case XDP_TX:
+			goto xdp_xmit;
+		case XDP_DROP:
+		default:
 			goto err_xdp;
+		}
 	}
 	rcu_read_unlock();
 
@@ -510,6 +562,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 err_buf:
 	dev->stats.rx_dropped++;
 	dev_kfree_skb(head_skb);
+xdp_xmit:
 	return NULL;
 }
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-29 20:09 [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO John Fastabend
                   ` (3 preceding siblings ...)
  2016-11-29 20:11 ` [net-next PATCH v3 5/6] virtio_net: add XDP_TX support John Fastabend
@ 2016-11-29 20:11 ` John Fastabend
  2016-11-30  0:37   ` Alexei Starovoitov
  2016-11-30 14:30   ` Jakub Kicinski
  4 siblings, 2 replies; 15+ messages in thread
From: John Fastabend @ 2016-11-29 20:11 UTC (permalink / raw)
  To: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer

virtio_net XDP support expects receive buffers to be contiguous.
If this is not the case we enable a slowpath to allow connectivity
to continue but at a significan performance overhead associated with
linearizing data. To make it painfully aware to users that XDP is
running in a degraded mode we throw an xdp buffer error.

To linearize packets we allocate a page and copy the segments of
the data, including the header, into it. After this the page can be
handled by XDP code flow as normal.

Then depending on the return code the page is either freed or sent
to the XDP xmit path. There is no attempt to optimize this path.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   70 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9604e55..b0ce4ef 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -449,6 +449,56 @@ static struct sk_buff *receive_big(struct net_device *dev,
 	return NULL;
 }
 
+/* The conditions to enable XDP should preclude the underlying device from
+ * sending packets across multiple buffers (num_buf > 1). However per spec
+ * it does not appear to be illegal to do so but rather just against convention.
+ * So in order to avoid making a system unresponsive the packets are pushed
+ * into a page and the XDP program is run. This will be extremely slow and we
+ * push a warning to the user to fix this as soon as possible. Fixing this may
+ * require resolving the underlying hardware to determine why multiple buffers
+ * are being received or simply loading the XDP program in the ingress stack
+ * after the skb is built because there is no advantage to running it here
+ * anymore.
+ */
+static struct page *xdp_linearize_page(struct receive_queue *rq,
+				       u16 num_buf,
+				       struct page *p,
+				       int offset,
+				       unsigned int *len)
+{
+	struct page *page = alloc_page(GFP_ATOMIC);
+	unsigned int page_off = 0;
+
+	if (!page)
+		return NULL;
+
+	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+	while (--num_buf) {
+		unsigned int buflen;
+		unsigned long ctx;
+		void *buf;
+		int off;
+
+		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
+		if (unlikely(!ctx))
+			goto err_buf;
+
+		buf = mergeable_ctx_to_buf_address(ctx);
+		p = virt_to_head_page(buf);
+		off = buf - page_address(p);
+
+		memcpy(page_address(page) + page_off,
+		       page_address(p) + off, buflen);
+		page_off += buflen;
+	}
+
+	*len = page_off;
+	return page;
+err_buf:
+	__free_pages(page, 0);
+	return NULL;
+}
+
 static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
@@ -469,21 +519,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
+		struct page *xdp_page;
 		u32 act;
 
 		if (num_buf > 1) {
 			bpf_warn_invalid_xdp_buffer();
-			goto err_xdp;
+
+			/* linearize data for XDP */
+			xdp_page = xdp_linearize_page(rq, num_buf,
+						      page, offset, &len);
+			if (!xdp_page)
+				goto err_xdp;
+			offset = len;
+		} else {
+			xdp_page = page;
 		}
 
-		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
+		act = do_xdp_prog(vi, xdp_prog, xdp_page, offset, len);
 		switch (act) {
 		case XDP_PASS:
+			if (unlikely(xdp_page != page))
+				__free_pages(xdp_page, 0);
 			break;
 		case XDP_TX:
+			if (unlikely(xdp_page != page))
+				goto err_xdp;
+			rcu_read_unlock();
 			goto xdp_xmit;
 		case XDP_DROP:
 		default:
+			if (unlikely(xdp_page != page))
+				__free_pages(xdp_page, 0);
 			goto err_xdp;
 		}
 	}

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-29 20:11 ` [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers John Fastabend
@ 2016-11-30  0:37   ` Alexei Starovoitov
  2016-11-30  2:50     ` John Fastabend
  2016-11-30 14:30   ` Jakub Kicinski
  1 sibling, 1 reply; 15+ messages in thread
From: Alexei Starovoitov @ 2016-11-30  0:37 UTC (permalink / raw)
  To: John Fastabend
  Cc: eric.dumazet, daniel, shm, davem, tgraf, john.r.fastabend, netdev,
	bblanco, brouer

On Tue, Nov 29, 2016 at 12:11:33PM -0800, John Fastabend wrote:
> virtio_net XDP support expects receive buffers to be contiguous.
> If this is not the case we enable a slowpath to allow connectivity
> to continue but at a significan performance overhead associated with
> linearizing data. To make it painfully aware to users that XDP is
> running in a degraded mode we throw an xdp buffer error.
> 
> To linearize packets we allocate a page and copy the segments of
> the data, including the header, into it. After this the page can be
> handled by XDP code flow as normal.
> 
> Then depending on the return code the page is either freed or sent
> to the XDP xmit path. There is no attempt to optimize this path.
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
...
> +/* The conditions to enable XDP should preclude the underlying device from
> + * sending packets across multiple buffers (num_buf > 1). However per spec
> + * it does not appear to be illegal to do so but rather just against convention.
> + * So in order to avoid making a system unresponsive the packets are pushed
> + * into a page and the XDP program is run. This will be extremely slow and we
> + * push a warning to the user to fix this as soon as possible. Fixing this may
> + * require resolving the underlying hardware to determine why multiple buffers
> + * are being received or simply loading the XDP program in the ingress stack
> + * after the skb is built because there is no advantage to running it here
> + * anymore.
> + */
...
>  		if (num_buf > 1) {
>  			bpf_warn_invalid_xdp_buffer();
> -			goto err_xdp;
> +
> +			/* linearize data for XDP */
> +			xdp_page = xdp_linearize_page(rq, num_buf,
> +						      page, offset, &len);
> +			if (!xdp_page)
> +				goto err_xdp;

in case when we're 'lucky' the performance will silently be bad.
Can we do warn_once here? so at least something in dmesg points out
that performance is not as expected. Am I reading it correctly that
you had to do a special kernel hack to trigger this situation and
in all normal cases it's not the case?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-30  0:37   ` Alexei Starovoitov
@ 2016-11-30  2:50     ` John Fastabend
  2016-11-30  5:23       ` Alexei Starovoitov
  0 siblings, 1 reply; 15+ messages in thread
From: John Fastabend @ 2016-11-30  2:50 UTC (permalink / raw)
  To: Alexei Starovoitov, Michael S. Tsirkin
  Cc: eric.dumazet, daniel, shm, davem, tgraf, john.r.fastabend, netdev,
	bblanco, brouer

On 16-11-29 04:37 PM, Alexei Starovoitov wrote:
> On Tue, Nov 29, 2016 at 12:11:33PM -0800, John Fastabend wrote:
>> virtio_net XDP support expects receive buffers to be contiguous.
>> If this is not the case we enable a slowpath to allow connectivity
>> to continue but at a significan performance overhead associated with
>> linearizing data. To make it painfully aware to users that XDP is
>> running in a degraded mode we throw an xdp buffer error.
>>
>> To linearize packets we allocate a page and copy the segments of
>> the data, including the header, into it. After this the page can be
>> handled by XDP code flow as normal.
>>
>> Then depending on the return code the page is either freed or sent
>> to the XDP xmit path. There is no attempt to optimize this path.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ...
>> +/* The conditions to enable XDP should preclude the underlying device from
>> + * sending packets across multiple buffers (num_buf > 1). However per spec
>> + * it does not appear to be illegal to do so but rather just against convention.
>> + * So in order to avoid making a system unresponsive the packets are pushed
>> + * into a page and the XDP program is run. This will be extremely slow and we
>> + * push a warning to the user to fix this as soon as possible. Fixing this may
>> + * require resolving the underlying hardware to determine why multiple buffers
>> + * are being received or simply loading the XDP program in the ingress stack
>> + * after the skb is built because there is no advantage to running it here
>> + * anymore.
>> + */
> ...
>>  		if (num_buf > 1) {
>>  			bpf_warn_invalid_xdp_buffer();
			^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Here is the warn once call. I made it a defined bpf warning so that we
can easily identify if needed and it can be used by other drivers as
well.

>> -			goto err_xdp;
>> +
>> +			/* linearize data for XDP */
>> +			xdp_page = xdp_linearize_page(rq, num_buf,
>> +						      page, offset, &len);
>> +			if (!xdp_page)
>> +				goto err_xdp;
> 
> in case when we're 'lucky' the performance will silently be bad.

Were you specifically worried about the alloc failing here and no
indication? I was thinking a statistic counter could be added as a
follow on series to catch this and other such cases in non XDP paths
if needed.

> Can we do warn_once here? so at least something in dmesg points out
> that performance is not as expected. Am I reading it correctly that
> you had to do a special kernel hack to trigger this situation and
> in all normal cases it's not the case?
> 

Correct the only way to produce this with upstream qemu and Linux is to
write a kernel hack to build these types of buffers. AFAIK I caught all
the cases where it could happen otherwise in the setup xdp prog call and
required user to configure device driver so they can not happen e.g.
disable LRO, set MTU < PAGE_SIZE, etc.

If I missed anything, I don't see it now, I would call it a bug and fix
it. Again AFAIK everything should be covered though. As Micheal pointed
out earlier although unexpected its not strictly against virtio spec to
build such packets so we should handle it at least in a degraded mode
even though it is not expected in any qemu cases.

.John

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-30  2:50     ` John Fastabend
@ 2016-11-30  5:23       ` Alexei Starovoitov
  0 siblings, 0 replies; 15+ messages in thread
From: Alexei Starovoitov @ 2016-11-30  5:23 UTC (permalink / raw)
  To: John Fastabend
  Cc: Michael S. Tsirkin, eric.dumazet, daniel, shm, davem, tgraf,
	john.r.fastabend, netdev, bblanco, brouer

On Tue, Nov 29, 2016 at 06:50:18PM -0800, John Fastabend wrote:
> On 16-11-29 04:37 PM, Alexei Starovoitov wrote:
> > On Tue, Nov 29, 2016 at 12:11:33PM -0800, John Fastabend wrote:
> >> virtio_net XDP support expects receive buffers to be contiguous.
> >> If this is not the case we enable a slowpath to allow connectivity
> >> to continue but at a significan performance overhead associated with
> >> linearizing data. To make it painfully aware to users that XDP is
> >> running in a degraded mode we throw an xdp buffer error.
> >>
> >> To linearize packets we allocate a page and copy the segments of
> >> the data, including the header, into it. After this the page can be
> >> handled by XDP code flow as normal.
> >>
> >> Then depending on the return code the page is either freed or sent
> >> to the XDP xmit path. There is no attempt to optimize this path.
> >>
> >> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> > ...
> >> +/* The conditions to enable XDP should preclude the underlying device from
> >> + * sending packets across multiple buffers (num_buf > 1). However per spec
> >> + * it does not appear to be illegal to do so but rather just against convention.
> >> + * So in order to avoid making a system unresponsive the packets are pushed
> >> + * into a page and the XDP program is run. This will be extremely slow and we
> >> + * push a warning to the user to fix this as soon as possible. Fixing this may
> >> + * require resolving the underlying hardware to determine why multiple buffers
> >> + * are being received or simply loading the XDP program in the ingress stack
> >> + * after the skb is built because there is no advantage to running it here
> >> + * anymore.
> >> + */
> > ...
> >>  		if (num_buf > 1) {
> >>  			bpf_warn_invalid_xdp_buffer();
> 			^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> Here is the warn once call. I made it a defined bpf warning so that we
> can easily identify if needed and it can be used by other drivers as
> well.

ahh. I thought it has - in front of it :)
and you're deleting it in this patch.

> >> -			goto err_xdp;
> >> +
> >> +			/* linearize data for XDP */
> >> +			xdp_page = xdp_linearize_page(rq, num_buf,
> >> +						      page, offset, &len);
> >> +			if (!xdp_page)
> >> +				goto err_xdp;
> > 
> > in case when we're 'lucky' the performance will silently be bad.
> 
> Were you specifically worried about the alloc failing here and no
> indication? I was thinking a statistic counter could be added as a
> follow on series to catch this and other such cases in non XDP paths
> if needed.
> 
> > Can we do warn_once here? so at least something in dmesg points out
> > that performance is not as expected. Am I reading it correctly that
> > you had to do a special kernel hack to trigger this situation and
> > in all normal cases it's not the case?
> > 
> 
> Correct the only way to produce this with upstream qemu and Linux is to
> write a kernel hack to build these types of buffers. AFAIK I caught all
> the cases where it could happen otherwise in the setup xdp prog call and
> required user to configure device driver so they can not happen e.g.
> disable LRO, set MTU < PAGE_SIZE, etc.
> 
> If I missed anything, I don't see it now, I would call it a bug and fix
> it. Again AFAIK everything should be covered though. As Micheal pointed
> out earlier although unexpected its not strictly against virtio spec to
> build such packets so we should handle it at least in a degraded mode
> even though it is not expected in any qemu cases.

Ok. Makes sense. The above wasn't clear in the commit log.
Thanks

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-29 20:11 ` [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers John Fastabend
  2016-11-30  0:37   ` Alexei Starovoitov
@ 2016-11-30 14:30   ` Jakub Kicinski
  2016-11-30 16:50     ` John Fastabend
  1 sibling, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2016-11-30 14:30 UTC (permalink / raw)
  To: John Fastabend, Michael S. Tsirkin
  Cc: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov,
	john.r.fastabend, netdev, bblanco, brouer

[add MST]

On Tue, 29 Nov 2016 12:11:33 -0800, John Fastabend wrote:
> virtio_net XDP support expects receive buffers to be contiguous.
> If this is not the case we enable a slowpath to allow connectivity
> to continue but at a significan performance overhead associated with
> linearizing data. To make it painfully aware to users that XDP is
> running in a degraded mode we throw an xdp buffer error.
> 
> To linearize packets we allocate a page and copy the segments of
> the data, including the header, into it. After this the page can be
> handled by XDP code flow as normal.
> 
> Then depending on the return code the page is either freed or sent
> to the XDP xmit path. There is no attempt to optimize this path.
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/virtio_net.c |   70 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9604e55..b0ce4ef 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -449,6 +449,56 @@ static struct sk_buff *receive_big(struct net_device *dev,
>  	return NULL;
>  }
>  
> +/* The conditions to enable XDP should preclude the underlying device from
> + * sending packets across multiple buffers (num_buf > 1). However per spec
> + * it does not appear to be illegal to do so but rather just against convention.
> + * So in order to avoid making a system unresponsive the packets are pushed
> + * into a page and the XDP program is run. This will be extremely slow and we
> + * push a warning to the user to fix this as soon as possible. Fixing this may
> + * require resolving the underlying hardware to determine why multiple buffers
> + * are being received or simply loading the XDP program in the ingress stack
> + * after the skb is built because there is no advantage to running it here
> + * anymore.
> + */
> +static struct page *xdp_linearize_page(struct receive_queue *rq,
> +				       u16 num_buf,
> +				       struct page *p,
> +				       int offset,
> +				       unsigned int *len)
> +{
> +	struct page *page = alloc_page(GFP_ATOMIC);
> +	unsigned int page_off = 0;
> +
> +	if (!page)
> +		return NULL;
> +
> +	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> +	while (--num_buf) {
> +		unsigned int buflen;
> +		unsigned long ctx;
> +		void *buf;
> +		int off;
> +
> +		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
> +		if (unlikely(!ctx))
> +			goto err_buf;
> +
> +		buf = mergeable_ctx_to_buf_address(ctx);
> +		p = virt_to_head_page(buf);
> +		off = buf - page_address(p);
> +
> +		memcpy(page_address(page) + page_off,
> +		       page_address(p) + off, buflen);
> +		page_off += buflen;

Could malicious user potentially submit a frame bigger than MTU?

> +	}
> +
> +	*len = page_off;
> +	return page;
> +err_buf:
> +	__free_pages(page, 0);
> +	return NULL;
> +}
> +
>  static struct sk_buff *receive_mergeable(struct net_device *dev,
>  					 struct virtnet_info *vi,
>  					 struct receive_queue *rq,
> @@ -469,21 +519,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	rcu_read_lock();
>  	xdp_prog = rcu_dereference(rq->xdp_prog);
>  	if (xdp_prog) {
> +		struct page *xdp_page;
>  		u32 act;
>  
>  		if (num_buf > 1) {
>  			bpf_warn_invalid_xdp_buffer();
> -			goto err_xdp;
> +
> +			/* linearize data for XDP */
> +			xdp_page = xdp_linearize_page(rq, num_buf,
> +						      page, offset, &len);
> +			if (!xdp_page)
> +				goto err_xdp;
> +			offset = len;
> +		} else {
> +			xdp_page = page;
>  		}
>  
> -		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
> +		act = do_xdp_prog(vi, xdp_prog, xdp_page, offset, len);
>  		switch (act) {
>  		case XDP_PASS:
> +			if (unlikely(xdp_page != page))
> +				__free_pages(xdp_page, 0);
>  			break;
>  		case XDP_TX:
> +			if (unlikely(xdp_page != page))
> +				goto err_xdp;
> +			rcu_read_unlock();

Only if there is a reason for v4 - this unlock could go to the previous
patch.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-30 14:30   ` Jakub Kicinski
@ 2016-11-30 16:50     ` John Fastabend
  2016-11-30 18:49       ` Michael S. Tsirkin
  0 siblings, 1 reply; 15+ messages in thread
From: John Fastabend @ 2016-11-30 16:50 UTC (permalink / raw)
  To: Jakub Kicinski, Michael S. Tsirkin
  Cc: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov,
	john.r.fastabend, netdev, bblanco, brouer

On 16-11-30 06:30 AM, Jakub Kicinski wrote:
> [add MST]
> 

Thanks sorry MST. I did a cut'n'paste of an old list of CC's and missed
you were not on the list.

[...]

>> +	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
>> +	while (--num_buf) {
>> +		unsigned int buflen;
>> +		unsigned long ctx;
>> +		void *buf;
>> +		int off;
>> +
>> +		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
>> +		if (unlikely(!ctx))
>> +			goto err_buf;
>> +
>> +		buf = mergeable_ctx_to_buf_address(ctx);
>> +		p = virt_to_head_page(buf);
>> +		off = buf - page_address(p);
>> +
>> +		memcpy(page_address(page) + page_off,
>> +		       page_address(p) + off, buflen);
>> +		page_off += buflen;
> 
> Could malicious user potentially submit a frame bigger than MTU?

Well presumably if the MTU is greater than PAGE_SIZE the xdp program
would not have been loaded. And the malicious user in this case would
have to be qemu which seems like everything is already lost if qemu
is trying to attack its VM.

But this is a good point because it looks like there is nothing in
virtio or qemu that drops frames with MTU greater than the virtio
configured setting. Maybe Michael can confirm this or I'll poke at it
more. I think qemu should drop these frames in general.

So I think adding a guard here is sensible I'll go ahead and do that.
Also the MTU guard at set_xdp time needs to account for header length.

Thanks nice catch.

> 
>> +	}
>> +
>> +	*len = page_off;
>> +	return page;
>> +err_buf:
>> +	__free_pages(page, 0);
>> +	return NULL;
>> +}
>> +
>>  static struct sk_buff *receive_mergeable(struct net_device *dev,
>>  					 struct virtnet_info *vi,
>>  					 struct receive_queue *rq,
>> @@ -469,21 +519,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>  	rcu_read_lock();
>>  	xdp_prog = rcu_dereference(rq->xdp_prog);
>>  	if (xdp_prog) {
>> +		struct page *xdp_page;
>>  		u32 act;
>>  
>>  		if (num_buf > 1) {
>>  			bpf_warn_invalid_xdp_buffer();
>> -			goto err_xdp;
>> +
>> +			/* linearize data for XDP */
>> +			xdp_page = xdp_linearize_page(rq, num_buf,
>> +						      page, offset, &len);
>> +			if (!xdp_page)
>> +				goto err_xdp;
>> +			offset = len;
>> +		} else {
>> +			xdp_page = page;
>>  		}
>>  
>> -		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
>> +		act = do_xdp_prog(vi, xdp_prog, xdp_page, offset, len);
>>  		switch (act) {
>>  		case XDP_PASS:
>> +			if (unlikely(xdp_page != page))
>> +				__free_pages(xdp_page, 0);
>>  			break;
>>  		case XDP_TX:
>> +			if (unlikely(xdp_page != page))
>> +				goto err_xdp;
>> +			rcu_read_unlock();
> 
> Only if there is a reason for v4 - this unlock could go to the previous
> patch.
> 

Sure will do this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 5/6] virtio_net: add XDP_TX support
  2016-11-29 20:11 ` [net-next PATCH v3 5/6] virtio_net: add XDP_TX support John Fastabend
@ 2016-11-30 18:45   ` Michael S. Tsirkin
  0 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2016-11-30 18:45 UTC (permalink / raw)
  To: John Fastabend
  Cc: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov,
	john.r.fastabend, netdev, bblanco, brouer

On Tue, Nov 29, 2016 at 12:11:08PM -0800, John Fastabend wrote:
> This adds support for the XDP_TX action to virtio_net. When an XDP
> program is run and returns the XDP_TX action the virtio_net XDP
> implementation will transmit the packet on a TX queue that aligns
> with the current CPU that the XDP packet was processed on.
> 
> Before sending the packet the header is zeroed.  Also XDP is expected
> to handle checksum correctly so no checksum offload  support is
> provided.
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/virtio_net.c |   59 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 56 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index a1bfa99..9604e55 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -330,12 +330,40 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>  	return skb;
>  }
>  
> +static void virtnet_xdp_xmit(struct virtnet_info *vi,
> +			     unsigned int qnum, struct xdp_buff *xdp)
> +{
> +	struct send_queue *sq = &vi->sq[qnum];
> +	struct virtio_net_hdr_mrg_rxbuf *hdr;
> +	unsigned int num_sg, len;
> +	void *xdp_sent;
> +
> +	/* Free up any pending old buffers before queueing new ones. */
> +	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> +		struct page *page = virt_to_head_page(xdp_sent);
> +
> +		put_page(page);
> +	}
> +
> +	/* Zero header and leave csum up to XDP layers */
> +	hdr = xdp->data;
> +	memset(hdr, 0, vi->hdr_len);
> +	hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
> +	hdr->hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;

Do we really want this?
This is CHECKSUM_UNNECESSARY.
Does not XDP pass checksummed packets?


> +
> +	num_sg = 1;
> +	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
> +	virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, xdp->data, GFP_ATOMIC);

This might fail. If it does, you want to at least free up the memory.

> +	virtqueue_kick(sq->vq);
> +}
> +
>  static u32 do_xdp_prog(struct virtnet_info *vi,
>  		       struct bpf_prog *xdp_prog,
>  		       struct page *page, int offset, int len)
>  {
>  	int hdr_padded_len;
>  	struct xdp_buff xdp;
> +	unsigned int qp;
>  	u32 act;
>  	u8 *buf;
>  
> @@ -353,9 +381,15 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
>  	switch (act) {
>  	case XDP_PASS:
>  		return XDP_PASS;
> +	case XDP_TX:
> +		qp = vi->curr_queue_pairs -
> +			vi->xdp_queue_pairs +
> +			smp_processor_id();
> +		xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
> +		virtnet_xdp_xmit(vi, qp, &xdp);
> +		return XDP_TX;
>  	default:
>  		bpf_warn_invalid_xdp_action(act);
> -	case XDP_TX:
>  	case XDP_ABORTED:
>  	case XDP_DROP:
>  		return XDP_DROP;
> @@ -387,8 +421,16 @@ static struct sk_buff *receive_big(struct net_device *dev,
>  	if (xdp_prog) {
>  		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
>  
> -		if (act == XDP_DROP)
> +		switch (act) {
> +		case XDP_PASS:
> +			break;
> +		case XDP_TX:
> +			rcu_read_unlock();
> +			goto xdp_xmit;
> +		case XDP_DROP:
> +		default:
>  			goto err_xdp;
> +		}
>  	}
>  	rcu_read_unlock();
>  
> @@ -403,6 +445,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
>  err:
>  	dev->stats.rx_dropped++;
>  	give_pages(rq, page);
> +xdp_xmit:
>  	return NULL;
>  }
>  
> @@ -421,6 +464,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	struct sk_buff *head_skb, *curr_skb;
>  	struct bpf_prog *xdp_prog;
>  
> +	head_skb = NULL;
> +
>  	rcu_read_lock();
>  	xdp_prog = rcu_dereference(rq->xdp_prog);
>  	if (xdp_prog) {
> @@ -432,8 +477,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  		}
>  
>  		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
> -		if (act == XDP_DROP)
> +		switch (act) {
> +		case XDP_PASS:
> +			break;
> +		case XDP_TX:
> +			goto xdp_xmit;
> +		case XDP_DROP:
> +		default:
>  			goto err_xdp;
> +		}
>  	}
>  	rcu_read_unlock();
>  
> @@ -510,6 +562,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  err_buf:
>  	dev->stats.rx_dropped++;
>  	dev_kfree_skb(head_skb);
> +xdp_xmit:
>  	return NULL;
>  }
>  

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers
  2016-11-30 16:50     ` John Fastabend
@ 2016-11-30 18:49       ` Michael S. Tsirkin
  0 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2016-11-30 18:49 UTC (permalink / raw)
  To: John Fastabend
  Cc: Jakub Kicinski, eric.dumazet, daniel, shm, davem, tgraf,
	alexei.starovoitov, john.r.fastabend, netdev, bblanco, brouer

On Wed, Nov 30, 2016 at 08:50:41AM -0800, John Fastabend wrote:
> On 16-11-30 06:30 AM, Jakub Kicinski wrote:
> > [add MST]
> > 
> 
> Thanks sorry MST. I did a cut'n'paste of an old list of CC's and missed
> you were not on the list.
> 
> [...]
> 
> >> +	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> >> +	while (--num_buf) {
> >> +		unsigned int buflen;
> >> +		unsigned long ctx;
> >> +		void *buf;
> >> +		int off;
> >> +
> >> +		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
> >> +		if (unlikely(!ctx))
> >> +			goto err_buf;
> >> +
> >> +		buf = mergeable_ctx_to_buf_address(ctx);
> >> +		p = virt_to_head_page(buf);
> >> +		off = buf - page_address(p);
> >> +
> >> +		memcpy(page_address(page) + page_off,
> >> +		       page_address(p) + off, buflen);
> >> +		page_off += buflen;
> > 
> > Could malicious user potentially submit a frame bigger than MTU?
> 
> Well presumably if the MTU is greater than PAGE_SIZE the xdp program
> would not have been loaded. And the malicious user in this case would
> have to be qemu which seems like everything is already lost if qemu
> is trying to attack its VM.
> 
> But this is a good point because it looks like there is nothing in
> virtio or qemu that drops frames with MTU greater than the virtio
> configured setting. Maybe Michael can confirm this or I'll poke at it
> more. I think qemu should drop these frames in general.
> 
> So I think adding a guard here is sensible I'll go ahead and do that.
> Also the MTU guard at set_xdp time needs to account for header length.

I agree. Further, offloads are disabled dynamically and we could
get a packet that was processed with LRO.

> Thanks nice catch.
> 
> > 
> >> +	}
> >> +
> >> +	*len = page_off;
> >> +	return page;
> >> +err_buf:
> >> +	__free_pages(page, 0);
> >> +	return NULL;
> >> +}
> >> +
> >>  static struct sk_buff *receive_mergeable(struct net_device *dev,
> >>  					 struct virtnet_info *vi,
> >>  					 struct receive_queue *rq,
> >> @@ -469,21 +519,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> >>  	rcu_read_lock();
> >>  	xdp_prog = rcu_dereference(rq->xdp_prog);
> >>  	if (xdp_prog) {
> >> +		struct page *xdp_page;
> >>  		u32 act;
> >>  
> >>  		if (num_buf > 1) {
> >>  			bpf_warn_invalid_xdp_buffer();
> >> -			goto err_xdp;
> >> +
> >> +			/* linearize data for XDP */
> >> +			xdp_page = xdp_linearize_page(rq, num_buf,
> >> +						      page, offset, &len);
> >> +			if (!xdp_page)
> >> +				goto err_xdp;
> >> +			offset = len;
> >> +		} else {
> >> +			xdp_page = page;
> >>  		}
> >>  
> >> -		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
> >> +		act = do_xdp_prog(vi, xdp_prog, xdp_page, offset, len);
> >>  		switch (act) {
> >>  		case XDP_PASS:
> >> +			if (unlikely(xdp_page != page))
> >> +				__free_pages(xdp_page, 0);
> >>  			break;
> >>  		case XDP_TX:
> >> +			if (unlikely(xdp_page != page))
> >> +				goto err_xdp;
> >> +			rcu_read_unlock();
> > 
> > Only if there is a reason for v4 - this unlock could go to the previous
> > patch.
> > 
> 
> Sure will do this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 3/6] virtio_net: Add XDP support
  2016-11-29 20:10 ` [net-next PATCH v3 3/6] virtio_net: Add XDP support John Fastabend
@ 2016-11-30 18:54   ` Michael S. Tsirkin
  2016-12-01  4:24     ` John Fastabend
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2016-11-30 18:54 UTC (permalink / raw)
  To: John Fastabend
  Cc: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov,
	john.r.fastabend, netdev, bblanco, brouer

On Tue, Nov 29, 2016 at 12:10:21PM -0800, John Fastabend wrote:
> From: John Fastabend <john.fastabend@gmail.com>
> 
> This adds XDP support to virtio_net. Some requirements must be
> met for XDP to be enabled depending on the mode. First it will
> only be supported with LRO disabled so that data is not pushed
> across multiple buffers. Second the MTU must be less than a page
> size to avoid having to handle XDP across multiple pages.
> 
> If mergeable receive is enabled this patch only supports the case
> where header and data are in the same buf which we can check when
> a packet is received by looking at num_buf. If the num_buf is
> greater than 1 and a XDP program is loaded the packet is dropped
> and a warning is thrown. When any_header_sg is set this does not
> happen and both header and data is put in a single buffer as expected
> so we check this when XDP programs are loaded.  Subsequent patches
> will process the packet in a degraded mode to ensure connectivity
> and correctness is not lost even if backend pushes packets into
> multiple buffers.
> 
> If big packets mode is enabled and MTU/LRO conditions above are
> met then XDP is allowed.
> 
> This patch was tested with qemu with vhost=on and vhost=off where
> mergable and big_packet modes were forced via hard coding feature
> negotiation. Multiple buffers per packet was forced via a small
> test patch to vhost.c in the vhost=on qemu mode.
> 
> Suggested-by: Shrijeet Mukherjee <shrijeet@gmail.com>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/virtio_net.c |  154 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 150 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 8189e5b..32126bf 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -22,6 +22,7 @@
>  #include <linux/module.h>
>  #include <linux/virtio.h>
>  #include <linux/virtio_net.h>
> +#include <linux/bpf.h>
>  #include <linux/scatterlist.h>
>  #include <linux/if_vlan.h>
>  #include <linux/slab.h>
> @@ -81,6 +82,8 @@ struct receive_queue {
>  
>  	struct napi_struct napi;
>  
> +	struct bpf_prog __rcu *xdp_prog;
> +
>  	/* Chain pages by the private ptr. */
>  	struct page *pages;
>  
> @@ -324,6 +327,38 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>  	return skb;
>  }
>  
> +static u32 do_xdp_prog(struct virtnet_info *vi,
> +		       struct bpf_prog *xdp_prog,
> +		       struct page *page, int offset, int len)
> +{
> +	int hdr_padded_len;
> +	struct xdp_buff xdp;
> +	u32 act;
> +	u8 *buf;
> +
> +	buf = page_address(page) + offset;
> +
> +	if (vi->mergeable_rx_bufs)
> +		hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +	else
> +		hdr_padded_len = sizeof(struct padded_vnet_hdr);
> +
> +	xdp.data = buf + hdr_padded_len;
> +	xdp.data_end = xdp.data + (len - vi->hdr_len);

so header seems to be ignored completely.
but the packet could be from the time when
e.g. checksum offloading was on, and
so it might gave DATA_VALID (from CHECKSUM_UNNECESSARY
in host).

I think you want to verify that flags and gso type
are 0.


> +
> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +	switch (act) {
> +	case XDP_PASS:
> +		return XDP_PASS;
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +	case XDP_TX:
> +	case XDP_ABORTED:
> +	case XDP_DROP:
> +		return XDP_DROP;
> +	}
> +}

do we really want this switch just to warn?
How about doing != XDP_PASS in the caller?

> +
>  static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
>  {
>  	struct sk_buff * skb = buf;
> @@ -340,14 +375,28 @@ static struct sk_buff *receive_big(struct net_device *dev,
>  				   void *buf,
>  				   unsigned int len)
>  {
> +	struct bpf_prog *xdp_prog;
>  	struct page *page = buf;
> -	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
> +	struct sk_buff *skb;
>  
> +	rcu_read_lock();
> +	xdp_prog = rcu_dereference(rq->xdp_prog);
> +	if (xdp_prog) {
> +		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
> +
> +		if (act == XDP_DROP)
> +			goto err_xdp;
> +	}
> +	rcu_read_unlock();
> +
> +	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
>  	if (unlikely(!skb))
>  		goto err;
>  
>  	return skb;
>  
> +err_xdp:
> +	rcu_read_unlock();
>  err:
>  	dev->stats.rx_dropped++;
>  	give_pages(rq, page);
> @@ -366,10 +415,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	struct page *page = virt_to_head_page(buf);
>  	int offset = buf - page_address(page);
>  	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));

This is some useless computation when XDP is used, isn't it?

> +	struct sk_buff *head_skb, *curr_skb;
> +	struct bpf_prog *xdp_prog;
>  
> -	struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
> -					       truesize);
> -	struct sk_buff *curr_skb = head_skb;
> +	rcu_read_lock();
> +	xdp_prog = rcu_dereference(rq->xdp_prog);
> +	if (xdp_prog) {
> +		u32 act;
> +
> +		if (num_buf > 1) {
> +			bpf_warn_invalid_xdp_buffer();
> +			goto err_xdp;
> +		}
> +
> +		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
> +		if (act == XDP_DROP)
> +			goto err_xdp;
> +	}
> +	rcu_read_unlock();
> +
> +	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
> +	curr_skb = head_skb;
>  
>  	if (unlikely(!curr_skb))
>  		goto err_skb;

I'm confused. Did the requirement to have a page per packet go away?
I don't think this mode is doing it here.


> @@ -423,6 +489,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
>  	return head_skb;
>  
> +err_xdp:
> +	rcu_read_unlock();
>  err_skb:
>  	put_page(page);
>  	while (--num_buf) {
> @@ -1328,6 +1396,13 @@ static int virtnet_set_channels(struct net_device *dev,
>  	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
>  		return -EINVAL;
>  
> +	/* For now we don't support modifying channels while XDP is loaded
> +	 * also when XDP is loaded all RX queues have XDP programs so we only
> +	 * need to check a single RX queue.
> +	 */
> +	if (vi->rq[0].xdp_prog)
> +		return -EINVAL;
> +
>  	get_online_cpus();
>  	err = virtnet_set_queues(vi, queue_pairs);
>  	if (!err) {
> @@ -1454,6 +1529,68 @@ static int virtnet_set_features(struct net_device *netdev,
>  	return 0;
>  }
>  
> +static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +	struct bpf_prog *old_prog;
> +	int i;
> +
> +	if ((dev->features & NETIF_F_LRO) && prog) {
> +		netdev_warn(dev, "can't set XDP while LRO is on, disable LRO first\n");
> +		return -EINVAL;
> +	}
> +
> +	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
> +		netdev_warn(dev, "XDP expects header/data in single page\n");
> +		return -EINVAL;
> +	}
> +
> +	if (dev->mtu > PAGE_SIZE) {
> +		netdev_warn(dev, "XDP requires MTU less than %lu\n", PAGE_SIZE);
> +		return -EINVAL;
> +	}
> +
> +	if (prog) {
> +		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
> +		if (IS_ERR(prog))
> +			return PTR_ERR(prog);
> +	}
> +
> +	for (i = 0; i < vi->max_queue_pairs; i++) {
> +		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
> +		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
> +		if (old_prog)
> +			bpf_prog_put(old_prog);

don't we need to sync before put?


> +	}
> +
> +	return 0;
> +}
> +
> +static bool virtnet_xdp_query(struct net_device *dev)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +	int i;
> +
> +	for (i = 0; i < vi->max_queue_pairs; i++) {
> +		if (vi->rq[i].xdp_prog)
> +			return true;
> +	}
> +	return false;
> +}
> +
> +static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
> +{
> +	switch (xdp->command) {
> +	case XDP_SETUP_PROG:
> +		return virtnet_xdp_set(dev, xdp->prog);
> +	case XDP_QUERY_PROG:
> +		xdp->prog_attached = virtnet_xdp_query(dev);
> +		return 0;
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
>  static const struct net_device_ops virtnet_netdev = {
>  	.ndo_open            = virtnet_open,
>  	.ndo_stop   	     = virtnet_close,
> @@ -1471,6 +1608,7 @@ static int virtnet_set_features(struct net_device *netdev,
>  	.ndo_busy_poll		= virtnet_busy_poll,
>  #endif
>  	.ndo_set_features	= virtnet_set_features,
> +	.ndo_xdp		= virtnet_xdp,
>  };
>  
>  static void virtnet_config_changed_work(struct work_struct *work)
> @@ -1527,12 +1665,20 @@ static void virtnet_free_queues(struct virtnet_info *vi)
>  
>  static void free_receive_bufs(struct virtnet_info *vi)
>  {
> +	struct bpf_prog *old_prog;
>  	int i;
>  
> +	rtnl_lock();
>  	for (i = 0; i < vi->max_queue_pairs; i++) {
>  		while (vi->rq[i].pages)
>  			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
> +
> +		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
> +		RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
> +		if (old_prog)
> +			bpf_prog_put(old_prog);
>  	}
> +	rtnl_unlock();
>  }
>  
>  static void free_receive_page_frags(struct virtnet_info *vi)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next PATCH v3 3/6] virtio_net: Add XDP support
  2016-11-30 18:54   ` Michael S. Tsirkin
@ 2016-12-01  4:24     ` John Fastabend
  0 siblings, 0 replies; 15+ messages in thread
From: John Fastabend @ 2016-12-01  4:24 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: eric.dumazet, daniel, shm, davem, tgraf, alexei.starovoitov,
	john.r.fastabend, netdev, bblanco, brouer

On 16-11-30 10:54 AM, Michael S. Tsirkin wrote:
> On Tue, Nov 29, 2016 at 12:10:21PM -0800, John Fastabend wrote:
>> From: John Fastabend <john.fastabend@gmail.com>
>>
>> This adds XDP support to virtio_net. Some requirements must be
>> met for XDP to be enabled depending on the mode. First it will
>> only be supported with LRO disabled so that data is not pushed
>> across multiple buffers. Second the MTU must be less than a page
>> size to avoid having to handle XDP across multiple pages.
>>
>> If mergeable receive is enabled this patch only supports the case
>> where header and data are in the same buf which we can check when
>> a packet is received by looking at num_buf. If the num_buf is
>> greater than 1 and a XDP program is loaded the packet is dropped
>> and a warning is thrown. When any_header_sg is set this does not
>> happen and both header and data is put in a single buffer as expected
>> so we check this when XDP programs are loaded.  Subsequent patches
>> will process the packet in a degraded mode to ensure connectivity
>> and correctness is not lost even if backend pushes packets into
>> multiple buffers.
>>
>> If big packets mode is enabled and MTU/LRO conditions above are
>> met then XDP is allowed.
>>
>> This patch was tested with qemu with vhost=on and vhost=off where
>> mergable and big_packet modes were forced via hard coding feature
>> negotiation. Multiple buffers per packet was forced via a small
>> test patch to vhost.c in the vhost=on qemu mode.
>>
>> Suggested-by: Shrijeet Mukherjee <shrijeet@gmail.com>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---

[...]

>>  
>> +static u32 do_xdp_prog(struct virtnet_info *vi,
>> +		       struct bpf_prog *xdp_prog,
>> +		       struct page *page, int offset, int len)
>> +{
>> +	int hdr_padded_len;
>> +	struct xdp_buff xdp;
>> +	u32 act;
>> +	u8 *buf;
>> +
>> +	buf = page_address(page) + offset;
>> +
>> +	if (vi->mergeable_rx_bufs)
>> +		hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>> +	else
>> +		hdr_padded_len = sizeof(struct padded_vnet_hdr);
>> +
>> +	xdp.data = buf + hdr_padded_len;
>> +	xdp.data_end = xdp.data + (len - vi->hdr_len);
> 
> so header seems to be ignored completely.
> but the packet could be from the time when
> e.g. checksum offloading was on, and
> so it might gave DATA_VALID (from CHECKSUM_UNNECESSARY
> in host).
> 
> I think you want to verify that flags and gso type
> are 0.

Ah yes agree. I think this might be possible in all the driver
implementations as well I wonder if there are bugs there as well.
The race between setting MTU, LRO, etc. and pushing the XDP prog
into the datapath seems unlikely but possible.

> 
> 
>> +
>> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> +	switch (act) {
>> +	case XDP_PASS:
>> +		return XDP_PASS;
>> +	default:
>> +		bpf_warn_invalid_xdp_action(act);
>> +	case XDP_TX:
>> +	case XDP_ABORTED:
>> +	case XDP_DROP:
>> +		return XDP_DROP;
>> +	}
>> +}
> 
> do we really want this switch just to warn?
> How about doing != XDP_PASS in the caller?

Well in later patches XDP_TX case gets handled. So its really
three cases, PASS, XDP_TX, {default|ABORT|DROP}. I don't have
a strong preference if its case statement or if/else it should
not matter except for style.

> 
>> +
>>  static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
>>  {
>>  	struct sk_buff * skb = buf;
>> @@ -340,14 +375,28 @@ static struct sk_buff *receive_big(struct net_device *dev,
>>  				   void *buf,
>>  				   unsigned int len)
>>  {
>> +	struct bpf_prog *xdp_prog;
>>  	struct page *page = buf;
>> -	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
>> +	struct sk_buff *skb;
>>  
>> +	rcu_read_lock();
>> +	xdp_prog = rcu_dereference(rq->xdp_prog);
>> +	if (xdp_prog) {
>> +		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
>> +
>> +		if (act == XDP_DROP)
>> +			goto err_xdp;
>> +	}
>> +	rcu_read_unlock();
>> +
>> +	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
>>  	if (unlikely(!skb))
>>  		goto err;
>>  
>>  	return skb;
>>  
>> +err_xdp:
>> +	rcu_read_unlock();
>>  err:
>>  	dev->stats.rx_dropped++;
>>  	give_pages(rq, page);
>> @@ -366,10 +415,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>  	struct page *page = virt_to_head_page(buf);
>>  	int offset = buf - page_address(page);
>>  	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
> 
> This is some useless computation when XDP is used, isn't it?
> 

Seems to be nice catch.

>> +	struct sk_buff *head_skb, *curr_skb;
>> +	struct bpf_prog *xdp_prog;
>>  
>> -	struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
>> -					       truesize);
>> -	struct sk_buff *curr_skb = head_skb;
>> +	rcu_read_lock();
>> +	xdp_prog = rcu_dereference(rq->xdp_prog);
>> +	if (xdp_prog) {
>> +		u32 act;
>> +
>> +		if (num_buf > 1) {
>> +			bpf_warn_invalid_xdp_buffer();
>> +			goto err_xdp;
>> +		}
>> +
>> +		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
>> +		if (act == XDP_DROP)
>> +			goto err_xdp;
>> +	}
>> +	rcu_read_unlock();
>> +
>> +	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
>> +	curr_skb = head_skb;
>>  
>>  	if (unlikely(!curr_skb))
>>  		goto err_skb;
> 
> I'm confused. Did the requirement to have a page per packet go away?
> I don't think this mode is doing it here.
> 

Correct its not a page per packet but there seems no reason to enforce
that here. XDP works just fine without contiguous buffers. I believe
the page per packet argument came out of the mlx driver implementation.

iirc DaveM had the most push back on the multiple packets per page
thing. Maybe he can comment again. (sorry Dave I'm dragging this up again).

> 
>> @@ -423,6 +489,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>  	ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
>>  	return head_skb;
>>  
>> +err_xdp:
>> +	rcu_read_unlock();
>>  err_skb:
>>  	put_page(page);
>>  	while (--num_buf) {
>> @@ -1328,6 +1396,13 @@ static int virtnet_set_channels(struct net_device *dev,
>>  	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
>>  		return -EINVAL;
>>  
>> +	/* For now we don't support modifying channels while XDP is loaded
>> +	 * also when XDP is loaded all RX queues have XDP programs so we only
>> +	 * need to check a single RX queue.
>> +	 */
>> +	if (vi->rq[0].xdp_prog)
>> +		return -EINVAL;
>> +
>>  	get_online_cpus();
>>  	err = virtnet_set_queues(vi, queue_pairs);
>>  	if (!err) {
>> @@ -1454,6 +1529,68 @@ static int virtnet_set_features(struct net_device *netdev,
>>  	return 0;
>>  }
>>  
>> +static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
>> +{
>> +	struct virtnet_info *vi = netdev_priv(dev);
>> +	struct bpf_prog *old_prog;
>> +	int i;
>> +
>> +	if ((dev->features & NETIF_F_LRO) && prog) {
>> +		netdev_warn(dev, "can't set XDP while LRO is on, disable LRO first\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
>> +		netdev_warn(dev, "XDP expects header/data in single page\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (dev->mtu > PAGE_SIZE) {
>> +		netdev_warn(dev, "XDP requires MTU less than %lu\n", PAGE_SIZE);
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (prog) {
>> +		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
>> +		if (IS_ERR(prog))
>> +			return PTR_ERR(prog);
>> +	}
>> +
>> +	for (i = 0; i < vi->max_queue_pairs; i++) {
>> +		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
>> +		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
>> +		if (old_prog)
>> +			bpf_prog_put(old_prog);
> 
> don't we need to sync before put?
> 

No should be handled by core infrastructure in bpf_prog_put(). Also this
aligns with other driver implementations.

> 
>> +	}
>> +
>> +	return 0;
>> +}
>> +

Thanks,
John

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2016-12-01  4:25 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-11-29 20:09 [net-next PATCH v3 1/6] net: virtio dynamically disable/enable LRO John Fastabend
2016-11-29 20:09 ` [net-next PATCH v3 2/6] net: xdp: add invalid buffer warning John Fastabend
2016-11-29 20:10 ` [net-next PATCH v3 3/6] virtio_net: Add XDP support John Fastabend
2016-11-30 18:54   ` Michael S. Tsirkin
2016-12-01  4:24     ` John Fastabend
2016-11-29 20:10 ` [net-next PATCH v3 4/6] virtio_net: add dedicated XDP transmit queues John Fastabend
2016-11-29 20:11 ` [net-next PATCH v3 5/6] virtio_net: add XDP_TX support John Fastabend
2016-11-30 18:45   ` Michael S. Tsirkin
2016-11-29 20:11 ` [net-next PATCH v3 6/6] virtio_net: xdp, add slowpath case for non contiguous buffers John Fastabend
2016-11-30  0:37   ` Alexei Starovoitov
2016-11-30  2:50     ` John Fastabend
2016-11-30  5:23       ` Alexei Starovoitov
2016-11-30 14:30   ` Jakub Kicinski
2016-11-30 16:50     ` John Fastabend
2016-11-30 18:49       ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).