public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org, BjörnTöpel <bjorn.topel@intel.com>,
	magnus.karlsson@intel.com
Cc: eugenia@mellanox.com, Jason Wang <jasowang@redhat.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Eran Ben Elisha <eranbe@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	galp@mellanox.com, Jesper Dangaard Brouer <brouer@redhat.com>,
	Daniel Borkmann <borkmann@iogearbox.net>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Tariq Toukan <tariqt@mellanox.com>
Subject: [bpf-next V5 PATCH 14/15] xdp: transition into using xdp_frame for return API
Date: Fri, 23 Mar 2018 13:19:10 +0100	[thread overview]
Message-ID: <152180755002.20167.11254036418774114364.stgit@firesoul> (raw)
In-Reply-To: <152180742196.20167.5168801400337773178.stgit@firesoul>

Changing API xdp_return_frame() to take struct xdp_frame as argument,
seems like a natural choice. But there are some subtle performance
details here that needs extra care, which is a deliberate choice.

When de-referencing xdp_frame on a remote CPU during DMA-TX
completion, result in the cache-line is change to "Shared"
state. Later when the page is reused for RX, then this xdp_frame
cache-line is written, which change the state to "Modified".

This situation already happens (naturally) for, virtio_net, tun and
cpumap as the xdp_frame pointer is the queued object.  In tun and
cpumap, the ptr_ring is used for efficiently transferring cache-lines
(with pointers) between CPUs. Thus, the only option is to
de-referencing xdp_frame.

It is only the ixgbe driver that had an optimization, in which it can
avoid doing the de-reference of xdp_frame.  The driver already have
TX-ring queue, which (in case of remote DMA-TX completion) have to be
transferred between CPUs anyhow.  In this data area, we stored a
struct xdp_mem_info and a data pointer, which allowed us to avoid
de-referencing xdp_frame.

To compensate for this, a prefetchw is used for telling the cache
coherency protocol about our access pattern.  My benchmarks show that
this prefetchw is enough to compensate the ixgbe driver.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h        |    4 +---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |   17 +++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    1 +
 drivers/net/tun.c                               |    4 ++--
 drivers/net/virtio_net.c                        |    2 +-
 include/net/xdp.h                               |    2 +-
 kernel/bpf/cpumap.c                             |    6 +++---
 net/core/xdp.c                                  |    4 +++-
 8 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index cbc20f199364..dfbc15a45cb4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -240,8 +240,7 @@ struct ixgbe_tx_buffer {
 	unsigned long time_stamp;
 	union {
 		struct sk_buff *skb;
-		/* XDP uses address ptr on irq_clean */
-		void *data;
+		struct xdp_frame *xdpf;
 	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
@@ -249,7 +248,6 @@ struct ixgbe_tx_buffer {
 	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
 	u32 tx_flags;
-	struct xdp_mem_info xdp_mem;
 };
 
 struct ixgbe_rx_buffer {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ff069597fccf..e6e9b28ecfba 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1207,7 +1207,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 
 		/* free the skb */
 		if (ring_is_xdp(tx_ring))
-			xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem);
+			xdp_return_frame(tx_buffer->xdpf);
 		else
 			napi_consume_skb(tx_buffer->skb, napi_budget);
 
@@ -2376,6 +2376,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 			xdp.data_hard_start = xdp.data -
 					      ixgbe_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
+			prefetchw(xdp.data_hard_start); /* xdp_frame write */
 
 			skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
 		}
@@ -5787,7 +5788,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
 
 		/* Free all the Tx ring sk_buffs */
 		if (ring_is_xdp(tx_ring))
-			xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem);
+			xdp_return_frame(tx_buffer->xdpf);
 		else
 			dev_kfree_skb_any(tx_buffer->skb);
 
@@ -8333,16 +8334,21 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
 	struct ixgbe_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
+	struct xdp_frame *xdpf;
 	u32 len, cmd_type;
 	dma_addr_t dma;
 	u16 i;
 
-	len = xdp->data_end - xdp->data;
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
+		return -EOVERFLOW;
+
+	len = xdpf->len;
 
 	if (unlikely(!ixgbe_desc_unused(ring)))
 		return IXGBE_XDP_CONSUMED;
 
-	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+	dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
 	if (dma_mapping_error(ring->dev, dma))
 		return IXGBE_XDP_CONSUMED;
 
@@ -8357,8 +8363,7 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 
 	dma_unmap_len_set(tx_buffer, len, len);
 	dma_unmap_addr_set(tx_buffer, dma, dma);
-	tx_buffer->data = xdp->data;
-	tx_buffer->xdp_mem = xdp->rxq->mem;
+	tx_buffer->xdpf = xdpf;
 
 	tx_desc->read.buffer_addr = cpu_to_le64(dma);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 2ac78b88fc3d..00b9b13d9fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -896,6 +896,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 				      di->addr + wi->offset,
 				      0, frag_size,
 				      DMA_FROM_DEVICE);
+	prefetchw(va); /* xdp_frame data area */
 	prefetch(data);
 	wi->offset += frag_size;
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81fddf9cc58f..a7e42ae1b220 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -663,7 +663,7 @@ static void tun_ptr_free(void *ptr)
 	if (tun_is_xdp_frame(ptr)) {
 		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
 
-		xdp_return_frame(xdpf->data, &xdpf->mem);
+		xdp_return_frame(xdpf);
 	} else {
 		__skb_array_destroy_skb(ptr);
 	}
@@ -2188,7 +2188,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
 
 		ret = tun_put_user_xdp(tun, tfile, xdpf, to);
-		xdp_return_frame(xdpf->data, &xdpf->mem);
+		xdp_return_frame(xdpf);
 	} else {
 		struct sk_buff *skb = ptr;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 48c86accd3b8..479a80339fad 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -430,7 +430,7 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
 
 	/* Free up any pending old buffers before queueing new ones. */
 	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
-		xdp_return_frame(xdpf_sent->data, &xdpf_sent->mem);
+		xdp_return_frame(xdpf_sent);
 
 	xdpf = convert_to_xdp_frame(xdp);
 	if (unlikely(!xdpf))
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 98b55eaf8fd7..35aa9825fdd0 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -103,7 +103,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	return xdp_frame;
 }
 
-void xdp_return_frame(void *data, struct xdp_mem_info *mem);
+void xdp_return_frame(struct xdp_frame *xdpf);
 
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 		     struct net_device *dev, u32 queue_index);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index bcdc4dea5ce7..c95b04ec103e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -219,7 +219,7 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
 
 	while ((xdpf = ptr_ring_consume(ring)))
 		if (WARN_ON_ONCE(xdpf))
-			xdp_return_frame(xdpf->data, &xdpf->mem);
+			xdp_return_frame(xdpf);
 }
 
 static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -275,7 +275,7 @@ static int cpu_map_kthread_run(void *data)
 
 			skb = cpu_map_build_skb(rcpu, xdpf);
 			if (!skb) {
-				xdp_return_frame(xdpf->data, &xdpf->mem);
+				xdp_return_frame(xdpf);
 				continue;
 			}
 
@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 			drops++;
-			xdp_return_frame(xdpf->data, &xdpf->mem);
+			xdp_return_frame(xdpf);
 		}
 		processed++;
 	}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index fe8e87abc266..6ed3d73a73be 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -294,9 +294,11 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 
-void xdp_return_frame(void *data, struct xdp_mem_info *mem)
+void xdp_return_frame(struct xdp_frame *xdpf)
 {
 	struct xdp_mem_allocator *xa = NULL;
+	struct xdp_mem_info *mem = &xdpf->mem;
+	void *data = xdpf->data;
 
 	rcu_read_lock();
 	if (mem->id)

  parent reply	other threads:[~2018-03-23 12:19 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-23 12:17 [bpf-next V5 PATCH 00/15] XDP redirect memory return API Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 01/15] mlx5: basic XDP_REDIRECT forward support Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 02/15] xdp: introduce xdp_return_frame API and use in cpumap Jesper Dangaard Brouer
2018-03-23 16:35   ` Alexander Duyck
2018-03-26 19:06     ` Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 03/15] ixgbe: use xdp_return_frame API Jesper Dangaard Brouer
2018-03-23 16:46   ` Alexander Duyck
2018-03-23 12:18 ` [bpf-next V5 PATCH 04/15] xdp: move struct xdp_buff from filter.h to xdp.h Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 05/15] xdp: introduce a new xdp_frame type Jesper Dangaard Brouer
2018-03-23 17:11   ` Alexander Duyck
2018-03-23 12:18 ` [bpf-next V5 PATCH 06/15] tun: convert to use generic xdp_frame and xdp_return_frame API Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 07/15] virtio_net: " Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 08/15] bpf: cpumap convert to use generic xdp_frame Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 09/15] mlx5: register a memory model when XDP is enabled Jesper Dangaard Brouer
2018-03-23 16:18   ` Sergei Shtylyov
2018-03-23 12:18 ` [bpf-next V5 PATCH 10/15] xdp: rhashtable with allocator ID to pointer mapping Jesper Dangaard Brouer
2018-03-23 16:56   ` Alexander Duyck
2018-03-23 18:15     ` Jesper Dangaard Brouer
2018-03-23 18:22       ` Alexander Duyck
2018-03-26 21:04     ` Jesper Dangaard Brouer
2018-03-23 12:18 ` [bpf-next V5 PATCH 11/15] page_pool: refurbish version of page_pool code Jesper Dangaard Brouer
2018-03-23 13:28   ` Eric Dumazet
2018-03-26 14:09     ` Jesper Dangaard Brouer
2018-03-23 13:29   ` Eric Dumazet
2018-03-23 14:15     ` Jesper Dangaard Brouer
2018-03-23 14:55       ` Eric Dumazet
2018-03-26 15:19         ` Jesper Dangaard Brouer
2018-03-23 13:37   ` Eric Dumazet
2018-03-23 12:18 ` [bpf-next V5 PATCH 12/15] xdp: allow page_pool as an allocator type in xdp_return_frame Jesper Dangaard Brouer
2018-03-23 17:02   ` Alexander Duyck
2018-03-23 12:19 ` [bpf-next V5 PATCH 13/15] mlx5: use page_pool for xdp_return_frame call Jesper Dangaard Brouer
2018-03-23 12:19 ` Jesper Dangaard Brouer [this message]
2018-03-23 17:29   ` [bpf-next V5 PATCH 14/15] xdp: transition into using xdp_frame for return API Alexander Duyck
2018-03-26 11:42     ` Jesper Dangaard Brouer
2018-03-23 12:19 ` [bpf-next V5 PATCH 15/15] xdp: transition into using xdp_frame for ndo_xdp_xmit Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152180755002.20167.11254036418774114364.stgit@firesoul \
    --to=brouer@redhat.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=bjorn.topel@intel.com \
    --cc=borkmann@iogearbox.net \
    --cc=eranbe@mellanox.com \
    --cc=eugenia@mellanox.com \
    --cc=galp@mellanox.com \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=tariqt@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox