netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver
@ 2023-04-22 18:54 Lorenzo Bianconi
  2023-04-22 18:54 ` [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-22 18:54 UTC (permalink / raw)
  To: netdev
  Cc: bpf, lorenzo.bianconi, davem, edumazet, kuba, pabeni, hawk,
	john.fastabend, ast, daniel

Introduce page_pool support in veth driver in order to recycle pages in
veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb through
the page allocator when we run a xdp program on the device and we receive
skbs from the stack.

Change since v1:
- remove page_pool checks in veth_convert_skb_to_xdp_buff() before allocating
  the pages
- recycle pages in the hot cache if build_skb fails in
  veth_convert_skb_to_xdp_buff()

Lorenzo Bianconi (2):
  net: veth: add page_pool for page recycling
  net: veth: add page_pool stats

 drivers/net/Kconfig |  2 ++
 drivers/net/veth.c  | 68 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 63 insertions(+), 7 deletions(-)

-- 
2.40.0


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-22 18:54 [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
@ 2023-04-22 18:54 ` Lorenzo Bianconi
  2023-04-23 12:17   ` Yunsheng Lin
  2023-04-22 18:54 ` [PATCH v2 net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
  2023-04-25  1:10 ` [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver patchwork-bot+netdevbpf
  2 siblings, 1 reply; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-22 18:54 UTC (permalink / raw)
  To: netdev
  Cc: bpf, lorenzo.bianconi, davem, edumazet, kuba, pabeni, hawk,
	john.fastabend, ast, daniel

Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been tested sending tcp traffic to a veth pair where the
remote peer is running a simple xdp program just returning xdp_pass:

veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps

veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps

Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/Kconfig |  1 +
 drivers/net/veth.c  | 48 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c34bd432da27..368c6f5b327e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
 
 config VETH
 	tristate "Virtual ethernet pair device"
+	select PAGE_POOL
 	help
 	  This device is a local ethernet tunnel. Devices are created in pairs.
 	  When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 4b3c6647edc6..35d2285dec25 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,6 +26,7 @@
 #include <linux/ptr_ring.h>
 #include <linux/bpf_trace.h>
 #include <linux/net_tstamp.h>
+#include <net/page_pool.h>
 
 #define DRV_NAME	"veth"
 #define DRV_VERSION	"1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
 	bool			rx_notify_masked;
 	struct ptr_ring		xdp_ring;
 	struct xdp_rxq_info	xdp_rxq;
+	struct page_pool	*page_pool;
 };
 
 struct veth_priv {
@@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 			goto drop;
 
 		/* Allocate skb head */
-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+		page = page_pool_dev_alloc_pages(rq->page_pool);
 		if (!page)
 			goto drop;
 
 		nskb = build_skb(page_address(page), PAGE_SIZE);
 		if (!nskb) {
-			put_page(page);
+			page_pool_put_full_page(rq->page_pool, page, true);
 			goto drop;
 		}
 
 		skb_reserve(nskb, VETH_XDP_HEADROOM);
+		skb_copy_header(nskb, skb);
+		skb_mark_for_recycle(nskb);
+
 		size = min_t(u32, skb->len, max_head_size);
 		if (skb_copy_bits(skb, 0, nskb->data, size)) {
 			consume_skb(nskb);
@@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		}
 		skb_put(nskb, size);
 
-		skb_copy_header(nskb, skb);
 		head_off = skb_headroom(nskb) - skb_headroom(skb);
 		skb_headers_offset_update(nskb, head_off);
 
@@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		len = skb->len - off;
 
 		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
-			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+			page = page_pool_dev_alloc_pages(rq->page_pool);
 			if (!page) {
 				consume_skb(nskb);
 				goto drop;
@@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
 	return done;
 }
 
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.pool_size = VETH_RING_SIZE,
+		.nid = NUMA_NO_NODE,
+		.dev = &rq->dev->dev,
+	};
+
+	rq->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rq->page_pool)) {
+		int err = PTR_ERR(rq->page_pool);
+
+		rq->page_pool = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
 static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
 
+	for (i = start; i < end; i++) {
+		err = veth_create_page_pool(&priv->rq[i]);
+		if (err)
+			goto err_page_pool;
+	}
+
 	for (i = start; i < end; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
@@ -1027,6 +1057,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 err_xdp_ring:
 	for (i--; i >= start; i--)
 		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+err_page_pool:
+	for (i = start; i < end; i++) {
+		page_pool_destroy(priv->rq[i].page_pool);
+		priv->rq[i].page_pool = NULL;
+	}
 
 	return err;
 }
@@ -1056,6 +1091,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
 		rq->rx_notify_masked = false;
 		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
 	}
+
+	for (i = start; i < end; i++) {
+		page_pool_destroy(priv->rq[i].page_pool);
+		priv->rq[i].page_pool = NULL;
+	}
 }
 
 static void veth_napi_del(struct net_device *dev)
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 net-next 2/2] net: veth: add page_pool stats
  2023-04-22 18:54 [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
  2023-04-22 18:54 ` [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
@ 2023-04-22 18:54 ` Lorenzo Bianconi
  2023-04-25  1:10 ` [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver patchwork-bot+netdevbpf
  2 siblings, 0 replies; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-22 18:54 UTC (permalink / raw)
  To: netdev
  Cc: bpf, lorenzo.bianconi, davem, edumazet, kuba, pabeni, hawk,
	john.fastabend, ast, daniel

Introduce page_pool stats support to report info about local page_pool
through ethtool

Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/Kconfig |  1 +
 drivers/net/veth.c  | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 368c6f5b327e..d0a1ed216d15 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -403,6 +403,7 @@ config TUN_VNET_CROSS_LE
 config VETH
 	tristate "Virtual ethernet pair device"
 	select PAGE_POOL
+	select PAGE_POOL_STATS
 	help
 	  This device is a local ethernet tunnel. Devices are created in pairs.
 	  When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 35d2285dec25..dce9f9d63e04 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -157,6 +157,8 @@ static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 			for (j = 0; j < VETH_TQ_STATS_LEN; j++)
 				ethtool_sprintf(&p, "tx_queue_%u_%.18s",
 						i, veth_tq_stats_desc[j].desc);
+
+		page_pool_ethtool_stats_get_strings(p);
 		break;
 	}
 }
@@ -167,7 +169,8 @@ static int veth_get_sset_count(struct net_device *dev, int sset)
 	case ETH_SS_STATS:
 		return ARRAY_SIZE(ethtool_stats_keys) +
 		       VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
-		       VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
+		       VETH_TQ_STATS_LEN * dev->real_num_tx_queues +
+		       page_pool_ethtool_stats_get_count();
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -178,7 +181,8 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 {
 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 	struct net_device *peer = rtnl_dereference(priv->peer);
-	int i, j, idx;
+	struct page_pool_stats pp_stats = {};
+	int i, j, idx, pp_idx;
 
 	data[0] = peer ? peer->ifindex : 0;
 	idx = 1;
@@ -197,9 +201,10 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 		} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
 		idx += VETH_RQ_STATS_LEN;
 	}
+	pp_idx = idx;
 
 	if (!peer)
-		return;
+		goto page_pool_stats;
 
 	rcv_priv = netdev_priv(peer);
 	for (i = 0; i < peer->real_num_rx_queues; i++) {
@@ -216,7 +221,16 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 				data[tx_idx + j] += *(u64 *)(base + offset);
 			}
 		} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
+		pp_idx = tx_idx + VETH_TQ_STATS_LEN;
+	}
+
+page_pool_stats:
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
+		if (!priv->rq[i].page_pool)
+			continue;
+		page_pool_get_stats(priv->rq[i].page_pool, &pp_stats);
 	}
+	page_pool_ethtool_stats_get(&data[pp_idx], &pp_stats);
 }
 
 static void veth_get_channels(struct net_device *dev,
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-22 18:54 ` [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
@ 2023-04-23 12:17   ` Yunsheng Lin
  2023-04-23 14:20     ` Lorenzo Bianconi
  0 siblings, 1 reply; 15+ messages in thread
From: Yunsheng Lin @ 2023-04-23 12:17 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: bpf, lorenzo.bianconi, davem, edumazet, kuba, pabeni, hawk,
	john.fastabend, ast, daniel

On 2023/4/23 2:54, Lorenzo Bianconi wrote:
>  struct veth_priv {
> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>  			goto drop;
>  
>  		/* Allocate skb head */
> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> +		page = page_pool_dev_alloc_pages(rq->page_pool);
>  		if (!page)
>  			goto drop;
>  
>  		nskb = build_skb(page_address(page), PAGE_SIZE);

If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
And we can reduce the memory usage too, which is a significant saving if page
size is 64K.


>  		if (!nskb) {
> -			put_page(page);
> +			page_pool_put_full_page(rq->page_pool, page, true);
>  			goto drop;
>  		}
>  
>  		skb_reserve(nskb, VETH_XDP_HEADROOM);
> +		skb_copy_header(nskb, skb);
> +		skb_mark_for_recycle(nskb);
> +
>  		size = min_t(u32, skb->len, max_head_size);
>  		if (skb_copy_bits(skb, 0, nskb->data, size)) {
>  			consume_skb(nskb);
> @@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>  		}
>  		skb_put(nskb, size);
>  
> -		skb_copy_header(nskb, skb);
>  		head_off = skb_headroom(nskb) - skb_headroom(skb);
>  		skb_headers_offset_update(nskb, head_off);
>  
> @@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>  		len = skb->len - off;
>  
>  		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
> -			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> +			page = page_pool_dev_alloc_pages(rq->page_pool);
>  			if (!page) {
>  				consume_skb(nskb);
>  				goto drop;
> @@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
>  	return done;
>  }
>  
> +static int veth_create_page_pool(struct veth_rq *rq)
> +{
> +	struct page_pool_params pp_params = {
> +		.order = 0,
> +		.pool_size = VETH_RING_SIZE,

It seems better to allocate different poo_size according to
the mtu, so that the best proformance is achiced using the
least memory?


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-23 12:17   ` Yunsheng Lin
@ 2023-04-23 14:20     ` Lorenzo Bianconi
  2023-04-24  2:29       ` Yunsheng Lin
  0 siblings, 1 reply; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-23 14:20 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, bpf, davem, edumazet, kuba, pabeni,
	hawk, john.fastabend, ast, daniel

[-- Attachment #1: Type: text/plain, Size: 2729 bytes --]

> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
> >  struct veth_priv {
> > @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >  			goto drop;
> >  
> >  		/* Allocate skb head */
> > -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> > +		page = page_pool_dev_alloc_pages(rq->page_pool);
> >  		if (!page)
> >  			goto drop;
> >  
> >  		nskb = build_skb(page_address(page), PAGE_SIZE);
> 
> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
> And we can reduce the memory usage too, which is a significant saving if page
> size is 64K.

please correct if I am wrong but I think the 1500B MTU case does not fit in the
half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
In particular:

- VETH_BUF_SIZE = 2048
- VETH_XDP_HEADROOM = 256 + 2 = 258
- max_headsize = SKB_WITH_OVERHEAD(VETH_BUF_SIZE - VETH_XDP_HEADROOM) = 1470

Even in this case we will need the consume a full page. In fact, performances
are a little bit worse:

MTU 1500: tcp throughput ~ 8.3Gbps

Do you agree or am I missing something?

Regards,
Lorenzo

> 
> 
> >  		if (!nskb) {
> > -			put_page(page);
> > +			page_pool_put_full_page(rq->page_pool, page, true);
> >  			goto drop;
> >  		}
> >  
> >  		skb_reserve(nskb, VETH_XDP_HEADROOM);
> > +		skb_copy_header(nskb, skb);
> > +		skb_mark_for_recycle(nskb);
> > +
> >  		size = min_t(u32, skb->len, max_head_size);
> >  		if (skb_copy_bits(skb, 0, nskb->data, size)) {
> >  			consume_skb(nskb);
> > @@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >  		}
> >  		skb_put(nskb, size);
> >  
> > -		skb_copy_header(nskb, skb);
> >  		head_off = skb_headroom(nskb) - skb_headroom(skb);
> >  		skb_headers_offset_update(nskb, head_off);
> >  
> > @@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >  		len = skb->len - off;
> >  
> >  		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
> > -			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> > +			page = page_pool_dev_alloc_pages(rq->page_pool);
> >  			if (!page) {
> >  				consume_skb(nskb);
> >  				goto drop;
> > @@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
> >  	return done;
> >  }
> >  
> > +static int veth_create_page_pool(struct veth_rq *rq)
> > +{
> > +	struct page_pool_params pp_params = {
> > +		.order = 0,
> > +		.pool_size = VETH_RING_SIZE,
> 
> It seems better to allocate different poo_size according to
> the mtu, so that the best proformance is achiced using the
> least memory?
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-23 14:20     ` Lorenzo Bianconi
@ 2023-04-24  2:29       ` Yunsheng Lin
  2023-04-24  9:17         ` Lorenzo Bianconi
  0 siblings, 1 reply; 15+ messages in thread
From: Yunsheng Lin @ 2023-04-24  2:29 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Lorenzo Bianconi, netdev, bpf, davem, edumazet, kuba, pabeni,
	hawk, john.fastabend, ast, daniel

On 2023/4/23 22:20, Lorenzo Bianconi wrote:
>> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
>>>  struct veth_priv {
>>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>>>  			goto drop;
>>>  
>>>  		/* Allocate skb head */
>>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
>>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
>>>  		if (!page)
>>>  			goto drop;
>>>  
>>>  		nskb = build_skb(page_address(page), PAGE_SIZE);
>>
>> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
>> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
>> And we can reduce the memory usage too, which is a significant saving if page
>> size is 64K.
> 
> please correct if I am wrong but I think the 1500B MTU case does not fit in the
> half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
> In particular:
> 
> - VETH_BUF_SIZE = 2048
> - VETH_XDP_HEADROOM = 256 + 2 = 258

On some arch the NET_IP_ALIGN is zero.

I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
as xdp_metalen_invalid() suggest, is there any other reason why we need
256 bytes here?

> - max_headsize = SKB_WITH_OVERHEAD(VETH_BUF_SIZE - VETH_XDP_HEADROOM) = 1470
> 
> Even in this case we will need the consume a full page. In fact, performances
> are a little bit worse:
> 
> MTU 1500: tcp throughput ~ 8.3Gbps
> 
> Do you agree or am I missing something?
> 
> Regards,
> Lorenzo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24  2:29       ` Yunsheng Lin
@ 2023-04-24  9:17         ` Lorenzo Bianconi
  2023-04-24 11:58           ` Yunsheng Lin
  0 siblings, 1 reply; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-24  9:17 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, bpf, davem, edumazet, kuba, pabeni,
	hawk, john.fastabend, ast, daniel

[-- Attachment #1: Type: text/plain, Size: 2215 bytes --]

> On 2023/4/23 22:20, Lorenzo Bianconi wrote:
> >> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
> >>>  struct veth_priv {
> >>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >>>  			goto drop;
> >>>  
> >>>  		/* Allocate skb head */
> >>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> >>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
> >>>  		if (!page)
> >>>  			goto drop;
> >>>  
> >>>  		nskb = build_skb(page_address(page), PAGE_SIZE);
> >>
> >> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
> >> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
> >> And we can reduce the memory usage too, which is a significant saving if page
> >> size is 64K.
> > 
> > please correct if I am wrong but I think the 1500B MTU case does not fit in the
> > half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
> > In particular:
> > 
> > - VETH_BUF_SIZE = 2048
> > - VETH_XDP_HEADROOM = 256 + 2 = 258
> 
> On some arch the NET_IP_ALIGN is zero.
> 
> I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
> xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
> as xdp_metalen_invalid() suggest, is there any other reason why we need
> 256 bytes here?

XDP_PACKET_HEADROOM must be greater than (40 + 32)B because you may want push
new data at the beginning of the xdp_buffer/xdp_frame running
bpf_xdp_adjust_head() helper.
I think 256B has been selected for XDP_PACKET_HEADROOM since it is 4 cachelines
(but I can be wrong).
There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
this is not merged yet and it is not related to this series. We can address
your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.

Regards,
Lorenzo

> 
> > - max_headsize = SKB_WITH_OVERHEAD(VETH_BUF_SIZE - VETH_XDP_HEADROOM) = 1470
> > 
> > Even in this case we will need the consume a full page. In fact, performances
> > are a little bit worse:
> > 
> > MTU 1500: tcp throughput ~ 8.3Gbps
> > 
> > Do you agree or am I missing something?
> > 
> > Regards,
> > Lorenzo
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24  9:17         ` Lorenzo Bianconi
@ 2023-04-24 11:58           ` Yunsheng Lin
  2023-04-24 13:04             ` Jesper Dangaard Brouer
                               ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Yunsheng Lin @ 2023-04-24 11:58 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Lorenzo Bianconi, netdev, bpf, davem, edumazet, kuba, pabeni,
	hawk, john.fastabend, ast, daniel

On 2023/4/24 17:17, Lorenzo Bianconi wrote:
>> On 2023/4/23 22:20, Lorenzo Bianconi wrote:
>>>> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
>>>>>  struct veth_priv {
>>>>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>>>>>  			goto drop;
>>>>>  
>>>>>  		/* Allocate skb head */
>>>>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
>>>>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
>>>>>  		if (!page)
>>>>>  			goto drop;
>>>>>  
>>>>>  		nskb = build_skb(page_address(page), PAGE_SIZE);
>>>>
>>>> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
>>>> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
>>>> And we can reduce the memory usage too, which is a significant saving if page
>>>> size is 64K.
>>>
>>> please correct if I am wrong but I think the 1500B MTU case does not fit in the
>>> half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
>>> In particular:
>>>
>>> - VETH_BUF_SIZE = 2048
>>> - VETH_XDP_HEADROOM = 256 + 2 = 258
>>
>> On some arch the NET_IP_ALIGN is zero.
>>
>> I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
>> xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
>> as xdp_metalen_invalid() suggest, is there any other reason why we need
>> 256 bytes here?
> 
> XDP_PACKET_HEADROOM must be greater than (40 + 32)B because you may want push
> new data at the beginning of the xdp_buffer/xdp_frame running
> bpf_xdp_adjust_head() helper.
> I think 256B has been selected for XDP_PACKET_HEADROOM since it is 4 cachelines
> (but I can be wrong).
> There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
> this is not merged yet and it is not related to this series. We can address
> your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.

It worth mentioning that the performance gain in this patch is at the cost of
more memory usage, at most of VETH_RING_SIZE(256) + PP_ALLOC_CACHE_SIZE(128)
pages is used.

IMHO, it seems better to limit the memory usage as much as possible, or provide a
way to disable/enable page pool for user.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24 11:58           ` Yunsheng Lin
@ 2023-04-24 13:04             ` Jesper Dangaard Brouer
  2023-04-24 13:06             ` Lorenzo Bianconi
  2023-04-24 13:10             ` Maciej Fijalkowski
  2 siblings, 0 replies; 15+ messages in thread
From: Jesper Dangaard Brouer @ 2023-04-24 13:04 UTC (permalink / raw)
  To: Yunsheng Lin, Lorenzo Bianconi
  Cc: brouer, Lorenzo Bianconi, netdev, bpf, davem, edumazet, kuba,
	pabeni, hawk, john.fastabend, ast, daniel


On 24/04/2023 13.58, Yunsheng Lin wrote:
> On 2023/4/24 17:17, Lorenzo Bianconi wrote:
>>> On 2023/4/23 22:20, Lorenzo Bianconi wrote:
>>>>> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
>>>>>>   struct veth_priv {
>>>>>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
>>>>>>   			goto drop;
>>>>>>   
>>>>>>   		/* Allocate skb head */
>>>>>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
>>>>>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
>>>>>>   		if (!page)
>>>>>>   			goto drop;
>>>>>>   
>>>>>>   		nskb = build_skb(page_address(page), PAGE_SIZE);
>>>>>
>>>>> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
>>>>> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
>>>>> And we can reduce the memory usage too, which is a significant saving if page
>>>>> size is 64K.
>>>>
>>>> please correct if I am wrong but I think the 1500B MTU case does not fit in the
>>>> half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
>>>> In particular:
>>>>
>>>> - VETH_BUF_SIZE = 2048
>>>> - VETH_XDP_HEADROOM = 256 + 2 = 258
>>>
>>> On some arch the NET_IP_ALIGN is zero.
>>>
>>> I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
>>> xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
>>> as xdp_metalen_invalid() suggest, is there any other reason why we need
>>> 256 bytes here?
>>
>> XDP_PACKET_HEADROOM must be greater than (40 + 32)B because you may want push
>> new data at the beginning of the xdp_buffer/xdp_frame running
>> bpf_xdp_adjust_head() helper.
>> I think 256B has been selected for XDP_PACKET_HEADROOM since it is 4 cachelines
>> (but I can be wrong).
>> There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
>> this is not merged yet and it is not related to this series. We can address
>> your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.
> 
> It worth mentioning that the performance gain in this patch is at the cost of
> more memory usage, at most of VETH_RING_SIZE(256) + PP_ALLOC_CACHE_SIZE(128)
> pages is used.
> 

The general scheme with XDP is trading memory for speed up.

> IMHO, it seems better to limit the memory usage as much as possible, or provide a
> way to disable/enable page pool for user.
> 

Well, that sort of it exists right... If you disable XDP, or actually
NAPI (looking at patches), it will also disable the page pool.

I want to high-light that Lorenzo is "just" replacing allocating a full
page via alloc_page() to a faster api, that happens to cache some of
these pages.
In that sense, I think this patch makes sense ... isolated seen.

My concern beyond this patch is that netif_receive_generic_xdp() and
veth_convert_skb_to_xdp_buff() are both dealing with SKB-to-XDP
conversion, but they are diverting in how they do this.
(Is the challenge that veth will also see "TX" SKBs?)

Kind changing the direction, but I'm thinking why the beep are we
allocating+copying the entire contents of the SKB.
There must be a better way? (especially after XDP got frags support).

--Jesper


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24 11:58           ` Yunsheng Lin
  2023-04-24 13:04             ` Jesper Dangaard Brouer
@ 2023-04-24 13:06             ` Lorenzo Bianconi
  2023-04-24 13:10             ` Maciej Fijalkowski
  2 siblings, 0 replies; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-24 13:06 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, bpf, davem, edumazet, kuba, pabeni,
	hawk, john.fastabend, ast, daniel

[-- Attachment #1: Type: text/plain, Size: 2616 bytes --]

> On 2023/4/24 17:17, Lorenzo Bianconi wrote:
> >> On 2023/4/23 22:20, Lorenzo Bianconi wrote:
> >>>> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
> >>>>>  struct veth_priv {
> >>>>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >>>>>  			goto drop;
> >>>>>  
> >>>>>  		/* Allocate skb head */
> >>>>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> >>>>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
> >>>>>  		if (!page)
> >>>>>  			goto drop;
> >>>>>  
> >>>>>  		nskb = build_skb(page_address(page), PAGE_SIZE);
> >>>>
> >>>> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
> >>>> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
> >>>> And we can reduce the memory usage too, which is a significant saving if page
> >>>> size is 64K.
> >>>
> >>> please correct if I am wrong but I think the 1500B MTU case does not fit in the
> >>> half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
> >>> In particular:
> >>>
> >>> - VETH_BUF_SIZE = 2048
> >>> - VETH_XDP_HEADROOM = 256 + 2 = 258
> >>
> >> On some arch the NET_IP_ALIGN is zero.
> >>
> >> I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
> >> xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
> >> as xdp_metalen_invalid() suggest, is there any other reason why we need
> >> 256 bytes here?
> > 
> > XDP_PACKET_HEADROOM must be greater than (40 + 32)B because you may want push
> > new data at the beginning of the xdp_buffer/xdp_frame running
> > bpf_xdp_adjust_head() helper.
> > I think 256B has been selected for XDP_PACKET_HEADROOM since it is 4 cachelines
> > (but I can be wrong).
> > There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
> > this is not merged yet and it is not related to this series. We can address
> > your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.
> 
> It worth mentioning that the performance gain in this patch is at the cost of
> more memory usage, at most of VETH_RING_SIZE(256) + PP_ALLOC_CACHE_SIZE(128)
> pages is used.

I would say the memory footprint is not so significative compared to the
performance improvement (>= 15%) in this particular case. In particular I think
in most of the cases we will recycle into ptr_ring:
- 4K pages: 256*4KB ~ 1MB
- 64K pages: 256*64KB ~ 16MB

Regards,
Lorenzo

> 
> IMHO, it seems better to limit the memory usage as much as possible, or provide a
> way to disable/enable page pool for user.
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24 11:58           ` Yunsheng Lin
  2023-04-24 13:04             ` Jesper Dangaard Brouer
  2023-04-24 13:06             ` Lorenzo Bianconi
@ 2023-04-24 13:10             ` Maciej Fijalkowski
  2023-04-24 13:41               ` Lorenzo Bianconi
  2023-04-25 11:19               ` Yunsheng Lin
  2 siblings, 2 replies; 15+ messages in thread
From: Maciej Fijalkowski @ 2023-04-24 13:10 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, Lorenzo Bianconi, netdev, bpf, davem, edumazet,
	kuba, pabeni, hawk, john.fastabend, ast, daniel

On Mon, Apr 24, 2023 at 07:58:07PM +0800, Yunsheng Lin wrote:
> On 2023/4/24 17:17, Lorenzo Bianconi wrote:
> >> On 2023/4/23 22:20, Lorenzo Bianconi wrote:
> >>>> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
> >>>>>  struct veth_priv {
> >>>>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> >>>>>  			goto drop;
> >>>>>  
> >>>>>  		/* Allocate skb head */
> >>>>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> >>>>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
> >>>>>  		if (!page)
> >>>>>  			goto drop;
> >>>>>  
> >>>>>  		nskb = build_skb(page_address(page), PAGE_SIZE);
> >>>>
> >>>> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
> >>>> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
> >>>> And we can reduce the memory usage too, which is a significant saving if page
> >>>> size is 64K.
> >>>
> >>> please correct if I am wrong but I think the 1500B MTU case does not fit in the
> >>> half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
> >>> In particular:
> >>>
> >>> - VETH_BUF_SIZE = 2048
> >>> - VETH_XDP_HEADROOM = 256 + 2 = 258
> >>
> >> On some arch the NET_IP_ALIGN is zero.
> >>
> >> I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
> >> xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
> >> as xdp_metalen_invalid() suggest, is there any other reason why we need
> >> 256 bytes here?
> > 
> > XDP_PACKET_HEADROOM must be greater than (40 + 32)B because you may want push
> > new data at the beginning of the xdp_buffer/xdp_frame running
> > bpf_xdp_adjust_head() helper.
> > I think 256B has been selected for XDP_PACKET_HEADROOM since it is 4 cachelines
> > (but I can be wrong).
> > There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
> > this is not merged yet and it is not related to this series. We can address
> > your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.

Intel drivers still work just fine at 192 headroom and split the page but
it makes it problematic for BIG TCP where MAX_SKB_FRAGS from shinfo needs
to be increased. So it's the tailroom that becomes the bottleneck, not the
headroom. I believe at some point we will convert our drivers to page_pool
with full 4k page dedicated for a single frame.

> 
> It worth mentioning that the performance gain in this patch is at the cost of
> more memory usage, at most of VETH_RING_SIZE(256) + PP_ALLOC_CACHE_SIZE(128)
> pages is used.
> 
> IMHO, it seems better to limit the memory usage as much as possible, or provide a
> way to disable/enable page pool for user.

I think that this argument is valuable due to the purpose that veth can
serve, IMHO it wouldn't be an argument for a standard PF driver. It would
be interesting to see how veth would work with PP_FLAG_PAGE_FRAG.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24 13:10             ` Maciej Fijalkowski
@ 2023-04-24 13:41               ` Lorenzo Bianconi
  2023-04-25 11:19               ` Yunsheng Lin
  1 sibling, 0 replies; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-24 13:41 UTC (permalink / raw)
  To: Maciej Fijalkowski
  Cc: Yunsheng Lin, Lorenzo Bianconi, netdev, bpf, davem, edumazet,
	kuba, pabeni, hawk, john.fastabend, ast, daniel

[-- Attachment #1: Type: text/plain, Size: 3518 bytes --]

> On Mon, Apr 24, 2023 at 07:58:07PM +0800, Yunsheng Lin wrote:
> > On 2023/4/24 17:17, Lorenzo Bianconi wrote:
> > >> On 2023/4/23 22:20, Lorenzo Bianconi wrote:
> > >>>> On 2023/4/23 2:54, Lorenzo Bianconi wrote:
> > >>>>>  struct veth_priv {
> > >>>>> @@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> > >>>>>  			goto drop;
> > >>>>>  
> > >>>>>  		/* Allocate skb head */
> > >>>>> -		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> > >>>>> +		page = page_pool_dev_alloc_pages(rq->page_pool);
> > >>>>>  		if (!page)
> > >>>>>  			goto drop;
> > >>>>>  
> > >>>>>  		nskb = build_skb(page_address(page), PAGE_SIZE);
> > >>>>
> > >>>> If page pool is used with PP_FLAG_PAGE_FRAG, maybe there is some additional
> > >>>> improvement for the MTU 1500B case, it seem a 4K page is able to hold two skb.
> > >>>> And we can reduce the memory usage too, which is a significant saving if page
> > >>>> size is 64K.
> > >>>
> > >>> please correct if I am wrong but I think the 1500B MTU case does not fit in the
> > >>> half-page buffer size since we need to take into account VETH_XDP_HEADROOM.
> > >>> In particular:
> > >>>
> > >>> - VETH_BUF_SIZE = 2048
> > >>> - VETH_XDP_HEADROOM = 256 + 2 = 258
> > >>
> > >> On some arch the NET_IP_ALIGN is zero.
> > >>
> > >> I suppose XDP_PACKET_HEADROOM are for xdp_frame and data_meta, it seems
> > >> xdp_frame is only 40 bytes for 64 bit arch and max size of metalen is 32
> > >> as xdp_metalen_invalid() suggest, is there any other reason why we need
> > >> 256 bytes here?
> > > 
> > > XDP_PACKET_HEADROOM must be greater than (40 + 32)B because you may want push
> > > new data at the beginning of the xdp_buffer/xdp_frame running
> > > bpf_xdp_adjust_head() helper.
> > > I think 256B has been selected for XDP_PACKET_HEADROOM since it is 4 cachelines
> > > (but I can be wrong).
> > > There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
> > > this is not merged yet and it is not related to this series. We can address
> > > your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.
> 
> Intel drivers still work just fine at 192 headroom and split the page but
> it makes it problematic for BIG TCP where MAX_SKB_FRAGS from shinfo needs
> to be increased. So it's the tailroom that becomes the bottleneck, not the
> headroom. I believe at some point we will convert our drivers to page_pool
> with full 4k page dedicated for a single frame.
> 
> > 
> > It worth mentioning that the performance gain in this patch is at the cost of
> > more memory usage, at most of VETH_RING_SIZE(256) + PP_ALLOC_CACHE_SIZE(128)
> > pages is used.
> > 
> > IMHO, it seems better to limit the memory usage as much as possible, or provide a
> > way to disable/enable page pool for user.
> 
> I think that this argument is valuable due to the purpose that veth can
> serve, IMHO it wouldn't be an argument for a standard PF driver. It would
> be interesting to see how veth would work with PP_FLAG_PAGE_FRAG.

actually I already have a PoC for using page_pool with PP_FLAG_PAGE_FRAG
flag in veth driver but if we do not reduce XDP_PACKET_HEADROOM size there
will not be any difference in the memory footprint since we will need two
fragments (so a full-page) for a 1500B frame. Moreover, we will have lower
performance since we will need to spread the data on two skb buffers
(linear area and frag0 in this case).

Regards,
Lorenzo

> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver
  2023-04-22 18:54 [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
  2023-04-22 18:54 ` [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
  2023-04-22 18:54 ` [PATCH v2 net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
@ 2023-04-25  1:10 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 15+ messages in thread
From: patchwork-bot+netdevbpf @ 2023-04-25  1:10 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: netdev, bpf, lorenzo.bianconi, davem, edumazet, kuba, pabeni,
	hawk, john.fastabend, ast, daniel

Hello:

This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Sat, 22 Apr 2023 20:54:31 +0200 you wrote:
> Introduce page_pool support in veth driver in order to recycle pages in
> veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb through
> the page allocator when we run a xdp program on the device and we receive
> skbs from the stack.
> 
> Change since v1:
> - remove page_pool checks in veth_convert_skb_to_xdp_buff() before allocating
>   the pages
> - recycle pages in the hot cache if build_skb fails in
>   veth_convert_skb_to_xdp_buff()
> 
> [...]

Here is the summary with links:
  - [v2,net-next,1/2] net: veth: add page_pool for page recycling
    https://git.kernel.org/netdev/net-next/c/0ebab78cbcbf
  - [v2,net-next,2/2] net: veth: add page_pool stats
    https://git.kernel.org/netdev/net-next/c/4fc418053ec7

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-24 13:10             ` Maciej Fijalkowski
  2023-04-24 13:41               ` Lorenzo Bianconi
@ 2023-04-25 11:19               ` Yunsheng Lin
  2023-04-25 14:13                 ` Lorenzo Bianconi
  1 sibling, 1 reply; 15+ messages in thread
From: Yunsheng Lin @ 2023-04-25 11:19 UTC (permalink / raw)
  To: Maciej Fijalkowski
  Cc: Lorenzo Bianconi, Lorenzo Bianconi, netdev, bpf, davem, edumazet,
	kuba, pabeni, hawk, john.fastabend, ast, daniel

On 2023/4/24 21:10, Maciej Fijalkowski wrote:
>>> There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
>>> this is not merged yet and it is not related to this series. We can address
>>> your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.
> 
> Intel drivers still work just fine at 192 headroom and split the page but
> it makes it problematic for BIG TCP where MAX_SKB_FRAGS from shinfo needs

I am not sure why we are not enabling skb_shinfo(skb)->frag_list to support
BIG TCP instead of increasing MAX_SKB_FRAGS, perhaps there was some disscution
about this in the past I am not aware of?

> to be increased. So it's the tailroom that becomes the bottleneck, not the
> headroom. I believe at some point we will convert our drivers to page_pool
> with full 4k page dedicated for a single frame.

Can we use header splitting to ensure there is enough tailroom for
napi_build_skb() or xdp_frame with shinfo?


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling
  2023-04-25 11:19               ` Yunsheng Lin
@ 2023-04-25 14:13                 ` Lorenzo Bianconi
  0 siblings, 0 replies; 15+ messages in thread
From: Lorenzo Bianconi @ 2023-04-25 14:13 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Maciej Fijalkowski, Lorenzo Bianconi, netdev, bpf, davem,
	edumazet, kuba, pabeni, hawk, john.fastabend, ast, daniel

[-- Attachment #1: Type: text/plain, Size: 1214 bytes --]

> On 2023/4/24 21:10, Maciej Fijalkowski wrote:
> >>> There was a discussion in the past to reduce XDP_PACKET_HEADROOM to 192B but
> >>> this is not merged yet and it is not related to this series. We can address
> >>> your comments in a follow-up patch when XDP_PACKET_HEADROOM series is merged.
> > 
> > Intel drivers still work just fine at 192 headroom and split the page but
> > it makes it problematic for BIG TCP where MAX_SKB_FRAGS from shinfo needs
> 
> I am not sure why we are not enabling skb_shinfo(skb)->frag_list to support
> BIG TCP instead of increasing MAX_SKB_FRAGS, perhaps there was some disscution
> about this in the past I am not aware of?
> 
> > to be increased. So it's the tailroom that becomes the bottleneck, not the
> > headroom. I believe at some point we will convert our drivers to page_pool
> > with full 4k page dedicated for a single frame.
> 
> Can we use header splitting to ensure there is enough tailroom for
> napi_build_skb() or xdp_frame with shinfo?
> 

since veth_convert_skb_to_xdp_buff() runs in veth_poll() I think we can use
napi_build_skb(). I tested it and we get an improvement (9.65Gbps vs 9.2Gbps
for 1500B frames).

Regards,
Lorenzo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-04-25 14:19 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-22 18:54 [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
2023-04-22 18:54 ` [PATCH v2 net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
2023-04-23 12:17   ` Yunsheng Lin
2023-04-23 14:20     ` Lorenzo Bianconi
2023-04-24  2:29       ` Yunsheng Lin
2023-04-24  9:17         ` Lorenzo Bianconi
2023-04-24 11:58           ` Yunsheng Lin
2023-04-24 13:04             ` Jesper Dangaard Brouer
2023-04-24 13:06             ` Lorenzo Bianconi
2023-04-24 13:10             ` Maciej Fijalkowski
2023-04-24 13:41               ` Lorenzo Bianconi
2023-04-25 11:19               ` Yunsheng Lin
2023-04-25 14:13                 ` Lorenzo Bianconi
2023-04-22 18:54 ` [PATCH v2 net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
2023-04-25  1:10 ` [PATCH v2 net-next 0/2] add page_pool support for page recycling in veth driver patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).