public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org, BjörnTöpel <bjorn.topel@intel.com>,
	magnus.karlsson@intel.com
Cc: eugenia@mellanox.com, Jason Wang <jasowang@redhat.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Eran Ben Elisha <eranbe@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	galp@mellanox.com, Jesper Dangaard Brouer <brouer@redhat.com>,
	Daniel Borkmann <borkmann@iogearbox.net>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Tariq Toukan <tariqt@mellanox.com>
Subject: [bpf-next V4 PATCH 08/15] bpf: cpumap convert to use generic xdp_frame
Date: Thu, 22 Mar 2018 15:21:48 +0100	[thread overview]
Message-ID: <152172850876.20979.13944289942386359288.stgit@firesoul> (raw)
In-Reply-To: <152172842149.20979.12110131083451936498.stgit@firesoul>

The generic xdp_frame format, was inspired by the cpumap own internal
xdp_pkt format.  It is now time to convert it over to the generic
xdp_frame format.  The cpumap needs one extra field dev_rx.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/net/xdp.h   |    1 +
 kernel/bpf/cpumap.c |  100 ++++++++++++++-------------------------------------
 2 files changed, 29 insertions(+), 72 deletions(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 13f71a15c79f..bc0cb97e20dc 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -68,6 +68,7 @@ struct xdp_frame {
 	 * while mem info is valid on remote CPU.
 	 */
 	struct xdp_mem_info mem;
+	struct net_device *dev_rx; /* used by cpumap */
 };
 
 /* Convert xdp_buff to xdp_frame */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 3e4bbcbe3e86..bcdc4dea5ce7 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -159,52 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work)
 	kthread_stop(rcpu->kthread);
 }
 
-/* For now, xdp_pkt is a cpumap internal data structure, with info
- * carried between enqueue to dequeue. It is mapped into the top
- * headroom of the packet, to avoid allocating separate mem.
- */
-struct xdp_pkt {
-	void *data;
-	u16 len;
-	u16 headroom;
-	u16 metasize;
-	/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
-	 * while mem info is valid on remote CPU.
-	 */
-	struct xdp_mem_info mem;
-	struct net_device *dev_rx;
-};
-
-/* Convert xdp_buff to xdp_pkt */
-static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
-{
-	struct xdp_pkt *xdp_pkt;
-	int metasize;
-	int headroom;
-
-	/* Assure headroom is available for storing info */
-	headroom = xdp->data - xdp->data_hard_start;
-	metasize = xdp->data - xdp->data_meta;
-	metasize = metasize > 0 ? metasize : 0;
-	if (unlikely((headroom - metasize) < sizeof(*xdp_pkt)))
-		return NULL;
-
-	/* Store info in top of packet */
-	xdp_pkt = xdp->data_hard_start;
-
-	xdp_pkt->data = xdp->data;
-	xdp_pkt->len  = xdp->data_end - xdp->data;
-	xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
-	xdp_pkt->metasize = metasize;
-
-	/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
-	xdp_pkt->mem = xdp->rxq->mem;
-
-	return xdp_pkt;
-}
-
 static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
-					 struct xdp_pkt *xdp_pkt)
+					 struct xdp_frame *xdpf)
 {
 	unsigned int frame_size;
 	void *pkt_data_start;
@@ -219,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	 * would be preferred to set frame_size to 2048 or 4096
 	 * depending on the driver.
 	 *   frame_size = 2048;
-	 *   frame_len  = frame_size - sizeof(*xdp_pkt);
+	 *   frame_len  = frame_size - sizeof(*xdp_frame);
 	 *
 	 * Instead, with info avail, skb_shared_info in placed after
 	 * packet len.  This, unfortunately fakes the truesize.
@@ -227,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	 * is not at a fixed memory location, with mixed length
 	 * packets, which is bad for cache-line hotness.
 	 */
-	frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
+	frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom +
 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
+	pkt_data_start = xdpf->data - xdpf->headroom;
 	skb = build_skb(pkt_data_start, frame_size);
 	if (!skb)
 		return NULL;
 
-	skb_reserve(skb, xdp_pkt->headroom);
-	__skb_put(skb, xdp_pkt->len);
-	if (xdp_pkt->metasize)
-		skb_metadata_set(skb, xdp_pkt->metasize);
+	skb_reserve(skb, xdpf->headroom);
+	__skb_put(skb, xdpf->len);
+	if (xdpf->metasize)
+		skb_metadata_set(skb, xdpf->metasize);
 
 	/* Essential SKB info: protocol and skb->dev */
-	skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
+	skb->protocol = eth_type_trans(skb, xdpf->dev_rx);
 
 	/* Optional SKB info, currently missing:
 	 * - HW checksum info		(skb->ip_summed)
@@ -259,11 +215,11 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
 	 * invoked cpu_map_kthread_stop(). Catch any broken behaviour
 	 * gracefully and warn once.
 	 */
-	struct xdp_pkt *xdp_pkt;
+	struct xdp_frame *xdpf;
 
-	while ((xdp_pkt = ptr_ring_consume(ring)))
-		if (WARN_ON_ONCE(xdp_pkt))
-			xdp_return_frame(xdp_pkt, &xdp_pkt->mem);
+	while ((xdpf = ptr_ring_consume(ring)))
+		if (WARN_ON_ONCE(xdpf))
+			xdp_return_frame(xdpf->data, &xdpf->mem);
 }
 
 static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -290,7 +246,7 @@ static int cpu_map_kthread_run(void *data)
 	 */
 	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
 		unsigned int processed = 0, drops = 0, sched = 0;
-		struct xdp_pkt *xdp_pkt;
+		struct xdp_frame *xdpf;
 
 		/* Release CPU reschedule checks */
 		if (__ptr_ring_empty(rcpu->queue)) {
@@ -313,13 +269,13 @@ static int cpu_map_kthread_run(void *data)
 		 * kthread CPU pinned. Lockless access to ptr_ring
 		 * consume side valid as no-resize allowed of queue.
 		 */
-		while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
+		while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
 			struct sk_buff *skb;
 			int ret;
 
-			skb = cpu_map_build_skb(rcpu, xdp_pkt);
+			skb = cpu_map_build_skb(rcpu, xdpf);
 			if (!skb) {
-				xdp_return_frame(xdp_pkt, &xdp_pkt->mem);
+				xdp_return_frame(xdpf->data, &xdpf->mem);
 				continue;
 			}
 
@@ -616,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 	spin_lock(&q->producer_lock);
 
 	for (i = 0; i < bq->count; i++) {
-		struct xdp_pkt *xdp_pkt = bq->q[i];
+		struct xdp_frame *xdpf = bq->q[i];
 		int err;
 
-		err = __ptr_ring_produce(q, xdp_pkt);
+		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 			drops++;
-			xdp_return_frame(xdp_pkt->data, &xdp_pkt->mem);
+			xdp_return_frame(xdpf->data, &xdpf->mem);
 		}
 		processed++;
 	}
@@ -637,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 /* Runs under RCU-read-side, plus in softirq under NAPI protection.
  * Thus, safe percpu variable access.
  */
-static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
+static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
@@ -648,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
 	 * driver to code invoking us to finished, due to driver
 	 * (e.g. ixgbe) recycle tricks based on page-refcnt.
 	 *
-	 * Thus, incoming xdp_pkt is always queued here (else we race
+	 * Thus, incoming xdp_frame is always queued here (else we race
 	 * with another CPU on page-refcnt and remaining driver code).
 	 * Queue time is very short, as driver will invoke flush
 	 * operation, when completing napi->poll call.
 	 */
-	bq->q[bq->count++] = xdp_pkt;
+	bq->q[bq->count++] = xdpf;
 	return 0;
 }
 
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx)
 {
-	struct xdp_pkt *xdp_pkt;
+	struct xdp_frame *xdpf;
 
-	xdp_pkt = convert_to_xdp_pkt(xdp);
-	if (unlikely(!xdp_pkt))
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
 	/* Info needed when constructing SKB on remote CPU */
-	xdp_pkt->dev_rx = dev_rx;
+	xdpf->dev_rx = dev_rx;
 
-	bq_enqueue(rcpu, xdp_pkt);
+	bq_enqueue(rcpu, xdpf);
 	return 0;
 }
 

  parent reply	other threads:[~2018-03-22 14:21 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-22 14:21 [bpf-next V4 PATCH 00/15] XDP redirect memory return API Jesper Dangaard Brouer
2018-03-22 14:21 ` [bpf-next V4 PATCH 01/15] mlx5: basic XDP_REDIRECT forward support Jesper Dangaard Brouer
2018-03-22 16:36   ` Tariq Toukan
2018-03-22 14:21 ` [bpf-next V4 PATCH 02/15] xdp: introduce xdp_return_frame API and use in cpumap Jesper Dangaard Brouer
2018-03-22 14:21 ` [bpf-next V4 PATCH 03/15] ixgbe: use xdp_return_frame API Jesper Dangaard Brouer
2018-03-22 14:21 ` [bpf-next V4 PATCH 04/15] xdp: move struct xdp_buff from filter.h to xdp.h Jesper Dangaard Brouer
2018-03-22 14:21 ` [bpf-next V4 PATCH 05/15] xdp: introduce a new xdp_frame type Jesper Dangaard Brouer
2018-03-22 14:21 ` [bpf-next V4 PATCH 06/15] tun: convert to use generic xdp_frame and xdp_return_frame API Jesper Dangaard Brouer
2018-03-22 14:21 ` [bpf-next V4 PATCH 07/15] virtio_net: " Jesper Dangaard Brouer
2018-03-22 14:21 ` Jesper Dangaard Brouer [this message]
2018-03-22 14:21 ` [bpf-next V4 PATCH 09/15] mlx5: register a memory model when XDP is enabled Jesper Dangaard Brouer
2018-03-22 16:37   ` Tariq Toukan
2018-03-22 14:21 ` [bpf-next V4 PATCH 10/15] xdp: rhashtable with allocator ID to pointer mapping Jesper Dangaard Brouer
2018-03-22 14:22 ` [bpf-next V4 PATCH 11/15] page_pool: refurbish version of page_pool code Jesper Dangaard Brouer
2018-03-22 17:21   ` Alexei Starovoitov
2018-03-23  9:16     ` Jesper Dangaard Brouer
2018-03-22 14:22 ` [bpf-next V4 PATCH 12/15] xdp: allow page_pool as an allocator type in xdp_return_frame Jesper Dangaard Brouer
2018-03-22 14:22 ` [bpf-next V4 PATCH 13/15] mlx5: use page_pool for xdp_return_frame call Jesper Dangaard Brouer
2018-03-22 16:40   ` Tariq Toukan
2018-03-22 14:22 ` [bpf-next V4 PATCH 14/15] xdp: transition into using xdp_frame for return API Jesper Dangaard Brouer
2018-03-22 14:22 ` [bpf-next V4 PATCH 15/15] xdp: transition into using xdp_frame for ndo_xdp_xmit Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152172850876.20979.13944289942386359288.stgit@firesoul \
    --to=brouer@redhat.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=bjorn.topel@intel.com \
    --cc=borkmann@iogearbox.net \
    --cc=eranbe@mellanox.com \
    --cc=eugenia@mellanox.com \
    --cc=galp@mellanox.com \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=tariqt@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox