netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] add page_pool support for page recycling in veth driver
@ 2023-04-20 11:16 Lorenzo Bianconi
  2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
  2023-04-20 11:16 ` [PATCH net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
  0 siblings, 2 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-20 11:16 UTC (permalink / raw)
  To: netdev
  Cc: bpf, davem, edumazet, kuba, pabeni, ast, daniel, hawk,
	john.fastabend, toke, mtahhan, lorenzo.bianconi

Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator when we run a xdp program on the device and
we receive skbs from the stack.

Lorenzo Bianconi (2):
  net: veth: add page_pool for page recycling
  net: veth: add page_pool stats

 drivers/net/Kconfig |  2 ++
 drivers/net/veth.c  | 74 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 68 insertions(+), 8 deletions(-)

-- 
2.40.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-20 11:16 [PATCH net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
@ 2023-04-20 11:16 ` Lorenzo Bianconi
  2023-04-22  3:22   ` Jakub Kicinski
  2023-04-20 11:16 ` [PATCH net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
  1 sibling, 1 reply; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-20 11:16 UTC (permalink / raw)
  To: netdev
  Cc: bpf, davem, edumazet, kuba, pabeni, ast, daniel, hawk,
	john.fastabend, toke, mtahhan, lorenzo.bianconi

Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been sending tcp traffic to a veth pair where the remote
peer is running a simple xdp program just returing xdp_pass:

veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps

veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps

Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/Kconfig |  1 +
 drivers/net/veth.c  | 54 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c34bd432da27..368c6f5b327e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
 
 config VETH
 	tristate "Virtual ethernet pair device"
+	select PAGE_POOL
 	help
 	  This device is a local ethernet tunnel. Devices are created in pairs.
 	  When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e1b38fbf1dd9..141b7745ba43 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,6 +26,7 @@
 #include <linux/ptr_ring.h>
 #include <linux/bpf_trace.h>
 #include <linux/net_tstamp.h>
+#include <net/page_pool.h>
 
 #define DRV_NAME	"veth"
 #define DRV_VERSION	"1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
 	bool			rx_notify_masked;
 	struct ptr_ring		xdp_ring;
 	struct xdp_rxq_info	xdp_rxq;
+	struct page_pool	*page_pool;
 };
 
 struct veth_priv {
@@ -711,8 +713,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 	    skb_shinfo(skb)->nr_frags ||
 	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
 		u32 size, len, max_head_size, off;
+		struct page *page = NULL;
 		struct sk_buff *nskb;
-		struct page *page;
 		int i, head_off;
 
 		/* We need a private copy of the skb and data buffers since
@@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 			goto drop;
 
 		/* Allocate skb head */
-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+		if (rq->page_pool)
+			page = page_pool_dev_alloc_pages(rq->page_pool);
 		if (!page)
 			goto drop;
 
 		nskb = build_skb(page_address(page), PAGE_SIZE);
 		if (!nskb) {
-			put_page(page);
+			page_pool_put_full_page(rq->page_pool, page, false);
 			goto drop;
 		}
 
 		skb_reserve(nskb, VETH_XDP_HEADROOM);
+		skb_copy_header(nskb, skb);
+		skb_mark_for_recycle(nskb);
+
 		size = min_t(u32, skb->len, max_head_size);
 		if (skb_copy_bits(skb, 0, nskb->data, size)) {
 			consume_skb(nskb);
@@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		}
 		skb_put(nskb, size);
 
-		skb_copy_header(nskb, skb);
 		head_off = skb_headroom(nskb) - skb_headroom(skb);
 		skb_headers_offset_update(nskb, head_off);
 
 		/* Allocate paged area of new skb */
 		off = size;
 		len = skb->len - off;
+		page = NULL;
 
 		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
-			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (rq->page_pool)
+				page = page_pool_dev_alloc_pages(rq->page_pool);
 			if (!page) {
 				consume_skb(nskb);
 				goto drop;
@@ -770,6 +777,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 
 			len -= size;
 			off += size;
+			page = NULL;
 		}
 
 		consume_skb(skb);
@@ -1002,11 +1010,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
 	return done;
 }
 
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.pool_size = VETH_RING_SIZE,
+		.nid = NUMA_NO_NODE,
+		.dev = &rq->dev->dev,
+	};
+
+	rq->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rq->page_pool)) {
+		int err = PTR_ERR(rq->page_pool);
+
+		rq->page_pool = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
 static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
 
+	for (i = start; i < end; i++) {
+		err = veth_create_page_pool(&priv->rq[i]);
+		if (err)
+			goto err_page_pool;
+	}
+
 	for (i = start; i < end; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
@@ -1027,6 +1061,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 err_xdp_ring:
 	for (i--; i >= start; i--)
 		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+err_page_pool:
+	for (i = start; i < end; i++) {
+		page_pool_destroy(priv->rq[i].page_pool);
+		priv->rq[i].page_pool = NULL;
+	}
 
 	return err;
 }
@@ -1056,6 +1095,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
 		rq->rx_notify_masked = false;
 		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
 	}
+
+	for (i = start; i < end; i++) {
+		page_pool_destroy(priv->rq[i].page_pool);
+		priv->rq[i].page_pool = NULL;
+	}
 }
 
 static void veth_napi_del(struct net_device *dev)
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next 2/2] net: veth: add page_pool stats
  2023-04-20 11:16 [PATCH net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
  2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
@ 2023-04-20 11:16 ` Lorenzo Bianconi
  1 sibling, 0 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-20 11:16 UTC (permalink / raw)
  To: netdev
  Cc: bpf, davem, edumazet, kuba, pabeni, ast, daniel, hawk,
	john.fastabend, toke, mtahhan, lorenzo.bianconi

Introduce page_pool stats support to report info about local page_pool
through ethtool

Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/Kconfig |  1 +
 drivers/net/veth.c  | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 368c6f5b327e..d0a1ed216d15 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -403,6 +403,7 @@ config TUN_VNET_CROSS_LE
 config VETH
 	tristate "Virtual ethernet pair device"
 	select PAGE_POOL
+	select PAGE_POOL_STATS
 	help
 	  This device is a local ethernet tunnel. Devices are created in pairs.
 	  When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 141b7745ba43..4e08a4633d25 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -157,6 +157,8 @@ static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 			for (j = 0; j < VETH_TQ_STATS_LEN; j++)
 				ethtool_sprintf(&p, "tx_queue_%u_%.18s",
 						i, veth_tq_stats_desc[j].desc);
+
+		page_pool_ethtool_stats_get_strings(p);
 		break;
 	}
 }
@@ -167,7 +169,8 @@ static int veth_get_sset_count(struct net_device *dev, int sset)
 	case ETH_SS_STATS:
 		return ARRAY_SIZE(ethtool_stats_keys) +
 		       VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
-		       VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
+		       VETH_TQ_STATS_LEN * dev->real_num_tx_queues +
+		       page_pool_ethtool_stats_get_count();
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -178,7 +181,8 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 {
 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 	struct net_device *peer = rtnl_dereference(priv->peer);
-	int i, j, idx;
+	struct page_pool_stats pp_stats = {};
+	int i, j, idx, pp_idx;
 
 	data[0] = peer ? peer->ifindex : 0;
 	idx = 1;
@@ -197,9 +201,10 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 		} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
 		idx += VETH_RQ_STATS_LEN;
 	}
+	pp_idx = idx;
 
 	if (!peer)
-		return;
+		goto page_pool_stats;
 
 	rcv_priv = netdev_priv(peer);
 	for (i = 0; i < peer->real_num_rx_queues; i++) {
@@ -216,7 +221,16 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 				data[tx_idx + j] += *(u64 *)(base + offset);
 			}
 		} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
+		pp_idx = tx_idx + VETH_TQ_STATS_LEN;
+	}
+
+page_pool_stats:
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
+		if (!priv->rq[i].page_pool)
+			continue;
+		page_pool_get_stats(priv->rq[i].page_pool, &pp_stats);
 	}
+	page_pool_ethtool_stats_get(&data[pp_idx], &pp_stats);
 }
 
 static void veth_get_channels(struct net_device *dev,
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
@ 2023-04-22  3:22   ` Jakub Kicinski
  2023-04-22 18:49     ` Lorenzo Bianconi
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Kicinski @ 2023-04-22  3:22 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: netdev, bpf, davem, edumazet, pabeni, ast, daniel, hawk,
	john.fastabend, toke, mtahhan, lorenzo.bianconi

On Thu, 20 Apr 2023 13:16:21 +0200 Lorenzo Bianconi wrote:
> @@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>  			goto drop;
>  
>  		/* Allocate skb head */
> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> +		if (rq->page_pool)

There's some condition under which we can get to XDP enabled but no
pool?

> +			page = page_pool_dev_alloc_pages(rq->page_pool);
>  		if (!page)
>  			goto drop;
>  
>  		nskb = build_skb(page_address(page), PAGE_SIZE);
>  		if (!nskb) {
> -			put_page(page);
> +			page_pool_put_full_page(rq->page_pool, page, false);

You can recycle direct, AFAIU the basic rule of thumb is that it's
always safe to recycle direct from the context which allocates from
the pool.

>  			goto drop;
>  		}
>  
>  		skb_reserve(nskb, VETH_XDP_HEADROOM);
> +		skb_copy_header(nskb, skb);
> +		skb_mark_for_recycle(nskb);
> +
>  		size = min_t(u32, skb->len, max_head_size);
>  		if (skb_copy_bits(skb, 0, nskb->data, size)) {
>  			consume_skb(nskb);
> @@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>  		}
>  		skb_put(nskb, size);
>  
> -		skb_copy_header(nskb, skb);
>  		head_off = skb_headroom(nskb) - skb_headroom(skb);
>  		skb_headers_offset_update(nskb, head_off);
>  
>  		/* Allocate paged area of new skb */
>  		off = size;
>  		len = skb->len - off;
> +		page = NULL;

Why do you clear the page pointer?

-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-22  3:22   ` Jakub Kicinski
@ 2023-04-22 18:49     ` Lorenzo Bianconi
  0 siblings, 0 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-22 18:49 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, bpf, davem, edumazet, pabeni, ast, daniel, hawk,
	john.fastabend, toke, mtahhan, lorenzo.bianconi

[-- Attachment #1: Type: text/plain, Size: 1826 bytes --]

> On Thu, 20 Apr 2023 13:16:21 +0200 Lorenzo Bianconi wrote:
> > @@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >  			goto drop;
> >  
> >  		/* Allocate skb head */
> > -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> > +		if (rq->page_pool)
> 
> There's some condition under which we can get to XDP enabled but no
> pool?

ack, since we destroy the page_pool after disabling the rq napi we can get rid
of 'if (rq->page_pool)' checks here.

> 
> > +			page = page_pool_dev_alloc_pages(rq->page_pool);
> >  		if (!page)
> >  			goto drop;
> >  
> >  		nskb = build_skb(page_address(page), PAGE_SIZE);
> >  		if (!nskb) {
> > -			put_page(page);
> > +			page_pool_put_full_page(rq->page_pool, page, false);
> 
> You can recycle direct, AFAIU the basic rule of thumb is that it's
> always safe to recycle direct from the context which allocates from
> the pool.

ack, I will fix it.

> 
> >  			goto drop;
> >  		}
> >  
> >  		skb_reserve(nskb, VETH_XDP_HEADROOM);
> > +		skb_copy_header(nskb, skb);
> > +		skb_mark_for_recycle(nskb);
> > +
> >  		size = min_t(u32, skb->len, max_head_size);
> >  		if (skb_copy_bits(skb, 0, nskb->data, size)) {
> >  			consume_skb(nskb);
> > @@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >  		}
> >  		skb_put(nskb, size);
> >  
> > -		skb_copy_header(nskb, skb);
> >  		head_off = skb_headroom(nskb) - skb_headroom(skb);
> >  		skb_headers_offset_update(nskb, head_off);
> >  
> >  		/* Allocate paged area of new skb */
> >  		off = size;
> >  		len = skb->len - off;
> > +		page = NULL;
> 
> Why do you clear the page pointer?

since we do not need 'if (rq->page_pool)' checks anymore we can get rid of it.

Regards,
Lorenzo

> 
> -- 
> pw-bot: cr

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-04-22 18:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-20 11:16 [PATCH net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
2023-04-22  3:22   ` Jakub Kicinski
2023-04-22 18:49     ` Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).