* [PATCH net-next 0/2] add page_pool support for page recycling in veth driver
@ 2023-04-20 11:16 Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
0 siblings, 2 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-20 11:16 UTC (permalink / raw)
To: netdev
Cc: bpf, davem, edumazet, kuba, pabeni, ast, daniel, hawk,
john.fastabend, toke, mtahhan, lorenzo.bianconi
Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator when we run a xdp program on the device and
we receive skbs from the stack.
Lorenzo Bianconi (2):
net: veth: add page_pool for page recycling
net: veth: add page_pool stats
drivers/net/Kconfig | 2 ++
drivers/net/veth.c | 74 ++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 68 insertions(+), 8 deletions(-)
--
2.40.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH net-next 1/2] net: veth: add page_pool for page recycling
2023-04-20 11:16 [PATCH net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
@ 2023-04-20 11:16 ` Lorenzo Bianconi
2023-04-22 3:22 ` Jakub Kicinski
2023-04-20 11:16 ` [PATCH net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
1 sibling, 1 reply; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-20 11:16 UTC (permalink / raw)
To: netdev
Cc: bpf, davem, edumazet, kuba, pabeni, ast, daniel, hawk,
john.fastabend, toke, mtahhan, lorenzo.bianconi
Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been sending tcp traffic to a veth pair where the remote
peer is running a simple xdp program just returing xdp_pass:
veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps
veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps
Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/Kconfig | 1 +
drivers/net/veth.c | 54 ++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c34bd432da27..368c6f5b327e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
config VETH
tristate "Virtual ethernet pair device"
+ select PAGE_POOL
help
This device is a local ethernet tunnel. Devices are created in pairs.
When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e1b38fbf1dd9..141b7745ba43 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,6 +26,7 @@
#include <linux/ptr_ring.h>
#include <linux/bpf_trace.h>
#include <linux/net_tstamp.h>
+#include <net/page_pool.h>
#define DRV_NAME "veth"
#define DRV_VERSION "1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
bool rx_notify_masked;
struct ptr_ring xdp_ring;
struct xdp_rxq_info xdp_rxq;
+ struct page_pool *page_pool;
};
struct veth_priv {
@@ -711,8 +713,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
skb_shinfo(skb)->nr_frags ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
u32 size, len, max_head_size, off;
+ struct page *page = NULL;
struct sk_buff *nskb;
- struct page *page;
int i, head_off;
/* We need a private copy of the skb and data buffers since
@@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
goto drop;
/* Allocate skb head */
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (rq->page_pool)
+ page = page_pool_dev_alloc_pages(rq->page_pool);
if (!page)
goto drop;
nskb = build_skb(page_address(page), PAGE_SIZE);
if (!nskb) {
- put_page(page);
+ page_pool_put_full_page(rq->page_pool, page, false);
goto drop;
}
skb_reserve(nskb, VETH_XDP_HEADROOM);
+ skb_copy_header(nskb, skb);
+ skb_mark_for_recycle(nskb);
+
size = min_t(u32, skb->len, max_head_size);
if (skb_copy_bits(skb, 0, nskb->data, size)) {
consume_skb(nskb);
@@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
}
skb_put(nskb, size);
- skb_copy_header(nskb, skb);
head_off = skb_headroom(nskb) - skb_headroom(skb);
skb_headers_offset_update(nskb, head_off);
/* Allocate paged area of new skb */
off = size;
len = skb->len - off;
+ page = NULL;
for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (rq->page_pool)
+ page = page_pool_dev_alloc_pages(rq->page_pool);
if (!page) {
consume_skb(nskb);
goto drop;
@@ -770,6 +777,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
len -= size;
off += size;
+ page = NULL;
}
consume_skb(skb);
@@ -1002,11 +1010,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
return done;
}
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .pool_size = VETH_RING_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = &rq->dev->dev,
+ };
+
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ int err = PTR_ERR(rq->page_pool);
+
+ rq->page_pool = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
+ for (i = start; i < end; i++) {
+ err = veth_create_page_pool(&priv->rq[i]);
+ if (err)
+ goto err_page_pool;
+ }
+
for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
@@ -1027,6 +1061,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
err_xdp_ring:
for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+err_page_pool:
+ for (i = start; i < end; i++) {
+ page_pool_destroy(priv->rq[i].page_pool);
+ priv->rq[i].page_pool = NULL;
+ }
return err;
}
@@ -1056,6 +1095,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
}
+
+ for (i = start; i < end; i++) {
+ page_pool_destroy(priv->rq[i].page_pool);
+ priv->rq[i].page_pool = NULL;
+ }
}
static void veth_napi_del(struct net_device *dev)
--
2.40.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 2/2] net: veth: add page_pool stats
2023-04-20 11:16 [PATCH net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
@ 2023-04-20 11:16 ` Lorenzo Bianconi
1 sibling, 0 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-20 11:16 UTC (permalink / raw)
To: netdev
Cc: bpf, davem, edumazet, kuba, pabeni, ast, daniel, hawk,
john.fastabend, toke, mtahhan, lorenzo.bianconi
Introduce page_pool stats support to report info about local page_pool
through ethtool
Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/Kconfig | 1 +
drivers/net/veth.c | 20 +++++++++++++++++---
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 368c6f5b327e..d0a1ed216d15 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -403,6 +403,7 @@ config TUN_VNET_CROSS_LE
config VETH
tristate "Virtual ethernet pair device"
select PAGE_POOL
+ select PAGE_POOL_STATS
help
This device is a local ethernet tunnel. Devices are created in pairs.
When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 141b7745ba43..4e08a4633d25 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -157,6 +157,8 @@ static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
for (j = 0; j < VETH_TQ_STATS_LEN; j++)
ethtool_sprintf(&p, "tx_queue_%u_%.18s",
i, veth_tq_stats_desc[j].desc);
+
+ page_pool_ethtool_stats_get_strings(p);
break;
}
}
@@ -167,7 +169,8 @@ static int veth_get_sset_count(struct net_device *dev, int sset)
case ETH_SS_STATS:
return ARRAY_SIZE(ethtool_stats_keys) +
VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
- VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
+ VETH_TQ_STATS_LEN * dev->real_num_tx_queues +
+ page_pool_ethtool_stats_get_count();
default:
return -EOPNOTSUPP;
}
@@ -178,7 +181,8 @@ static void veth_get_ethtool_stats(struct net_device *dev,
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer);
- int i, j, idx;
+ struct page_pool_stats pp_stats = {};
+ int i, j, idx, pp_idx;
data[0] = peer ? peer->ifindex : 0;
idx = 1;
@@ -197,9 +201,10 @@ static void veth_get_ethtool_stats(struct net_device *dev,
} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
idx += VETH_RQ_STATS_LEN;
}
+ pp_idx = idx;
if (!peer)
- return;
+ goto page_pool_stats;
rcv_priv = netdev_priv(peer);
for (i = 0; i < peer->real_num_rx_queues; i++) {
@@ -216,7 +221,16 @@ static void veth_get_ethtool_stats(struct net_device *dev,
data[tx_idx + j] += *(u64 *)(base + offset);
}
} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
+ pp_idx = tx_idx + VETH_TQ_STATS_LEN;
+ }
+
+page_pool_stats:
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ if (!priv->rq[i].page_pool)
+ continue;
+ page_pool_get_stats(priv->rq[i].page_pool, &pp_stats);
}
+ page_pool_ethtool_stats_get(&data[pp_idx], &pp_stats);
}
static void veth_get_channels(struct net_device *dev,
--
2.40.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next 1/2] net: veth: add page_pool for page recycling
2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
@ 2023-04-22 3:22 ` Jakub Kicinski
2023-04-22 18:49 ` Lorenzo Bianconi
0 siblings, 1 reply; 5+ messages in thread
From: Jakub Kicinski @ 2023-04-22 3:22 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: netdev, bpf, davem, edumazet, pabeni, ast, daniel, hawk,
john.fastabend, toke, mtahhan, lorenzo.bianconi
On Thu, 20 Apr 2023 13:16:21 +0200 Lorenzo Bianconi wrote:
> @@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> goto drop;
>
> /* Allocate skb head */
> - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> + if (rq->page_pool)
There's some condition under which we can get to XDP enabled but no
pool?
> + page = page_pool_dev_alloc_pages(rq->page_pool);
> if (!page)
> goto drop;
>
> nskb = build_skb(page_address(page), PAGE_SIZE);
> if (!nskb) {
> - put_page(page);
> + page_pool_put_full_page(rq->page_pool, page, false);
You can recycle direct, AFAIU the basic rule of thumb is that it's
always safe to recycle direct from the context which allocates from
the pool.
> goto drop;
> }
>
> skb_reserve(nskb, VETH_XDP_HEADROOM);
> + skb_copy_header(nskb, skb);
> + skb_mark_for_recycle(nskb);
> +
> size = min_t(u32, skb->len, max_head_size);
> if (skb_copy_bits(skb, 0, nskb->data, size)) {
> consume_skb(nskb);
> @@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> }
> skb_put(nskb, size);
>
> - skb_copy_header(nskb, skb);
> head_off = skb_headroom(nskb) - skb_headroom(skb);
> skb_headers_offset_update(nskb, head_off);
>
> /* Allocate paged area of new skb */
> off = size;
> len = skb->len - off;
> + page = NULL;
Why do you clear the page pointer?
--
pw-bot: cr
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next 1/2] net: veth: add page_pool for page recycling
2023-04-22 3:22 ` Jakub Kicinski
@ 2023-04-22 18:49 ` Lorenzo Bianconi
0 siblings, 0 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2023-04-22 18:49 UTC (permalink / raw)
To: Jakub Kicinski
Cc: netdev, bpf, davem, edumazet, pabeni, ast, daniel, hawk,
john.fastabend, toke, mtahhan, lorenzo.bianconi
[-- Attachment #1: Type: text/plain, Size: 1826 bytes --]
> On Thu, 20 Apr 2023 13:16:21 +0200 Lorenzo Bianconi wrote:
> > @@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> > goto drop;
> >
> > /* Allocate skb head */
> > - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> > + if (rq->page_pool)
>
> There's some condition under which we can get to XDP enabled but no
> pool?
ack, since we destroy the page_pool after disabling the rq napi we can get rid
of 'if (rq->page_pool)' checks here.
>
> > + page = page_pool_dev_alloc_pages(rq->page_pool);
> > if (!page)
> > goto drop;
> >
> > nskb = build_skb(page_address(page), PAGE_SIZE);
> > if (!nskb) {
> > - put_page(page);
> > + page_pool_put_full_page(rq->page_pool, page, false);
>
> You can recycle direct, AFAIU the basic rule of thumb is that it's
> always safe to recycle direct from the context which allocates from
> the pool.
ack, I will fix it.
>
> > goto drop;
> > }
> >
> > skb_reserve(nskb, VETH_XDP_HEADROOM);
> > + skb_copy_header(nskb, skb);
> > + skb_mark_for_recycle(nskb);
> > +
> > size = min_t(u32, skb->len, max_head_size);
> > if (skb_copy_bits(skb, 0, nskb->data, size)) {
> > consume_skb(nskb);
> > @@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
> > }
> > skb_put(nskb, size);
> >
> > - skb_copy_header(nskb, skb);
> > head_off = skb_headroom(nskb) - skb_headroom(skb);
> > skb_headers_offset_update(nskb, head_off);
> >
> > /* Allocate paged area of new skb */
> > off = size;
> > len = skb->len - off;
> > + page = NULL;
>
> Why do you clear the page pointer?
since we do not need 'if (rq->page_pool)' checks anymore we can get rid of it.
Regards,
Lorenzo
>
> --
> pw-bot: cr
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-04-22 18:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-20 11:16 [PATCH net-next 0/2] add page_pool support for page recycling in veth driver Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 1/2] net: veth: add page_pool for page recycling Lorenzo Bianconi
2023-04-22 3:22 ` Jakub Kicinski
2023-04-22 18:49 ` Lorenzo Bianconi
2023-04-20 11:16 ` [PATCH net-next 2/2] net: veth: add page_pool stats Lorenzo Bianconi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.