* [PATCH V5,net-next] net: mana: Add page pool for RX buffers
@ 2023-08-02 18:07 Haiyang Zhang
2023-08-03 1:44 ` Jesse Brandeburg
0 siblings, 1 reply; 4+ messages in thread
From: Haiyang Zhang @ 2023-08-02 18:07 UTC (permalink / raw)
To: linux-hyperv, netdev
Cc: haiyangz, decui, kys, paulros, olaf, vkuznets, davem, wei.liu,
edumazet, kuba, pabeni, leon, longli, ssengar, linux-rdma, daniel,
john.fastabend, bpf, ast, sharmaajay, hawk, tglx, shradhagupta,
linux-kernel
Add page pool for RX buffers for faster buffer cycle and reduce CPU
usage.
The standard page pool API is used.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
V5:
In err path, set page_pool_put_full_page(..., false) as suggested by
Jakub Kicinski
V4:
Add nid setting, remove page_pool_nid_changed(), as suggested by
Jesper Dangaard Brouer
V3:
Update xdp mem model, pool param, alloc as suggested by Jakub Kicinski
V2:
Use the standard page pool API as suggested by Jesper Dangaard Brouer
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 90 +++++++++++++++----
include/net/mana/mana.h | 3 +
2 files changed, 77 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index ac2acc9aca9d..1a4ac1c8736e 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1414,8 +1414,8 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
return skb;
}
-static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
- struct mana_rxq *rxq)
+static void mana_rx_skb(void *buf_va, bool from_pool,
+ struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq)
{
struct mana_stats_rx *rx_stats = &rxq->stats;
struct net_device *ndev = rxq->ndev;
@@ -1448,6 +1448,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
if (!skb)
goto drop;
+ if (from_pool)
+ skb_mark_for_recycle(skb);
+
skb->dev = napi->dev;
skb->protocol = eth_type_trans(skb, ndev);
@@ -1498,9 +1501,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
u64_stats_update_end(&rx_stats->syncp);
drop:
- WARN_ON_ONCE(rxq->xdp_save_va);
- /* Save for reuse */
- rxq->xdp_save_va = buf_va;
+ if (from_pool) {
+ page_pool_recycle_direct(rxq->page_pool,
+ virt_to_head_page(buf_va));
+ } else {
+ WARN_ON_ONCE(rxq->xdp_save_va);
+ /* Save for reuse */
+ rxq->xdp_save_va = buf_va;
+ }
++ndev->stats.rx_dropped;
@@ -1508,11 +1516,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
}
static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
- dma_addr_t *da, bool is_napi)
+ dma_addr_t *da, bool *from_pool, bool is_napi)
{
struct page *page;
void *va;
+ *from_pool = false;
+
/* Reuse XDP dropped page if available */
if (rxq->xdp_save_va) {
va = rxq->xdp_save_va;
@@ -1533,17 +1543,22 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
return NULL;
}
} else {
- page = dev_alloc_page();
+ page = page_pool_dev_alloc_pages(rxq->page_pool);
if (!page)
return NULL;
+ *from_pool = true;
va = page_to_virt(page);
}
*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, *da)) {
- put_page(virt_to_head_page(va));
+ if (*from_pool)
+ page_pool_put_full_page(rxq->page_pool, page, false);
+ else
+ put_page(virt_to_head_page(va));
+
return NULL;
}
@@ -1552,21 +1567,25 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
/* Allocate frag for rx buffer, and save the old buf */
static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
- struct mana_recv_buf_oob *rxoob, void **old_buf)
+ struct mana_recv_buf_oob *rxoob, void **old_buf,
+ bool *old_fp)
{
+ bool from_pool;
dma_addr_t da;
void *va;
- va = mana_get_rxfrag(rxq, dev, &da, true);
+ va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true);
if (!va)
return;
dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
DMA_FROM_DEVICE);
*old_buf = rxoob->buf_va;
+ *old_fp = rxoob->from_pool;
rxoob->buf_va = va;
rxoob->sgl[0].address = da;
+ rxoob->from_pool = from_pool;
}
static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
@@ -1580,6 +1599,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct device *dev = gc->dev;
void *old_buf = NULL;
u32 curr, pktlen;
+ bool old_fp;
apc = netdev_priv(ndev);
@@ -1622,12 +1642,12 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
- mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf);
+ mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp);
/* Unsuccessful refill will have old_buf == NULL.
* In this case, mana_rx_skb() will drop the packet.
*/
- mana_rx_skb(old_buf, oob, rxq);
+ mana_rx_skb(old_buf, old_fp, oob, rxq);
drop:
mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
@@ -1887,6 +1907,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
struct napi_struct *napi;
+ struct page *page;
int i;
if (!rxq)
@@ -1919,10 +1940,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
dma_unmap_single(dev, rx_oob->sgl[0].address,
rx_oob->sgl[0].size, DMA_FROM_DEVICE);
- put_page(virt_to_head_page(rx_oob->buf_va));
+ page = virt_to_head_page(rx_oob->buf_va);
+
+ if (rx_oob->from_pool)
+ page_pool_put_full_page(rxq->page_pool, page, false);
+ else
+ put_page(page);
+
rx_oob->buf_va = NULL;
}
+ page_pool_destroy(rxq->page_pool);
+
if (rxq->gdma_rq)
mana_gd_destroy_queue(gc, rxq->gdma_rq);
@@ -1933,18 +1962,20 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
struct mana_rxq *rxq, struct device *dev)
{
struct mana_port_context *mpc = netdev_priv(rxq->ndev);
+ bool from_pool = false;
dma_addr_t da;
void *va;
if (mpc->rxbufs_pre)
va = mana_get_rxbuf_pre(rxq, &da);
else
- va = mana_get_rxfrag(rxq, dev, &da, false);
+ va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false);
if (!va)
return -ENOMEM;
rx_oob->buf_va = va;
+ rx_oob->from_pool = from_pool;
rx_oob->sgl[0].address = da;
rx_oob->sgl[0].size = rxq->datasize;
@@ -2014,6 +2045,26 @@ static int mana_push_wqe(struct mana_rxq *rxq)
return 0;
}
+static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
+{
+ struct page_pool_params pprm = {};
+ int ret;
+
+ pprm.pool_size = RX_BUFFERS_PER_QUEUE;
+ pprm.nid = gc->numa_node;
+ pprm.napi = &rxq->rx_cq.napi;
+
+ rxq->page_pool = page_pool_create(&pprm);
+
+ if (IS_ERR(rxq->page_pool)) {
+ ret = PTR_ERR(rxq->page_pool);
+ rxq->page_pool = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
u32 rxq_idx, struct mana_eq *eq,
struct net_device *ndev)
@@ -2043,6 +2094,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
&rxq->headroom);
+ /* Create page pool for RX queue */
+ err = mana_create_page_pool(rxq, gc);
+ if (err) {
+ netdev_err(ndev, "Create page pool err:%d\n", err);
+ goto out;
+ }
+
err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
if (err)
goto out;
@@ -2114,8 +2172,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
cq->napi.napi_id));
- WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL));
+ WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ rxq->page_pool));
napi_enable(&cq->napi);
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 024ad8ddb27e..b12859511839 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
+ bool from_pool; /* allocated from a page pool */
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@@ -330,6 +331,8 @@ struct mana_rxq {
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
+ struct page_pool *page_pool;
+
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH V5,net-next] net: mana: Add page pool for RX buffers
2023-08-02 18:07 [PATCH V5,net-next] net: mana: Add page pool for RX buffers Haiyang Zhang
@ 2023-08-03 1:44 ` Jesse Brandeburg
2023-08-04 10:49 ` Jesper Dangaard Brouer
0 siblings, 1 reply; 4+ messages in thread
From: Jesse Brandeburg @ 2023-08-03 1:44 UTC (permalink / raw)
To: Haiyang Zhang, linux-hyperv, netdev
Cc: decui, kys, paulros, olaf, vkuznets, davem, wei.liu, edumazet,
kuba, pabeni, leon, longli, ssengar, linux-rdma, daniel,
john.fastabend, bpf, ast, sharmaajay, hawk, tglx, shradhagupta,
linux-kernel
On 8/2/2023 11:07 AM, Haiyang Zhang wrote:
> Add page pool for RX buffers for faster buffer cycle and reduce CPU
> usage.
>
> The standard page pool API is used.
>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
> V5:
> In err path, set page_pool_put_full_page(..., false) as suggested by
> Jakub Kicinski
> V4:
> Add nid setting, remove page_pool_nid_changed(), as suggested by
> Jesper Dangaard Brouer
> V3:
> Update xdp mem model, pool param, alloc as suggested by Jakub Kicinski
> V2:
> Use the standard page pool API as suggested by Jesper Dangaard Brouer
> ---
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 024ad8ddb27e..b12859511839 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
> struct gdma_wqe_request wqe_req;
>
> void *buf_va;
> + bool from_pool; /* allocated from a page pool */
suggest you use flags and not bools, as bools waste 7 bits each, plus
your packing of this struct will be full of holes, made worse by this
patch. (see pahole tool)
>
> /* SGL of the buffer going to be sent has part of the work request. */
> u32 num_sge;
> @@ -330,6 +331,8 @@ struct mana_rxq {
> bool xdp_flush;
> int xdp_rc; /* XDP redirect return code */
>
> + struct page_pool *page_pool;
> +
> /* MUST BE THE LAST MEMBER:
> * Each receive buffer has an associated mana_recv_buf_oob.
> */
The rest of the patch looks ok and is remarkably compact for a
conversion to page pool. I'd prefer someone with more page pool exposure
review this for correctness, but FWIW
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH V5,net-next] net: mana: Add page pool for RX buffers
2023-08-03 1:44 ` Jesse Brandeburg
@ 2023-08-04 10:49 ` Jesper Dangaard Brouer
2023-08-04 20:20 ` Haiyang Zhang
0 siblings, 1 reply; 4+ messages in thread
From: Jesper Dangaard Brouer @ 2023-08-04 10:49 UTC (permalink / raw)
To: Jesse Brandeburg, Haiyang Zhang, linux-hyperv, netdev
Cc: decui, kys, paulros, olaf, vkuznets, davem, wei.liu, edumazet,
kuba, pabeni, leon, longli, ssengar, linux-rdma, daniel,
john.fastabend, bpf, ast, sharmaajay, hawk, tglx, shradhagupta,
linux-kernel, Ilias Apalodimas
On 03/08/2023 03.44, Jesse Brandeburg wrote:
> On 8/2/2023 11:07 AM, Haiyang Zhang wrote:
>> Add page pool for RX buffers for faster buffer cycle and reduce CPU
>> usage.
>>
Can you add some info on the performance improvement this patch gives?
Your previous post mentioned:
> With iperf and 128 threads test, this patch improved the throughput
by 12-15%, and decreased the IRQ associated CPU's usage from 99-100% to
10-50%.
>> The standard page pool API is used.
>>
>> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
>> ---
>> V5:
>> In err path, set page_pool_put_full_page(..., false) as suggested by
>> Jakub Kicinski
>> V4:
>> Add nid setting, remove page_pool_nid_changed(), as suggested by
>> Jesper Dangaard Brouer
>> V3:
>> Update xdp mem model, pool param, alloc as suggested by Jakub Kicinski
>> V2:
>> Use the standard page pool API as suggested by Jesper Dangaard Brouer
>> ---
>
>> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
>> index 024ad8ddb27e..b12859511839 100644
>> --- a/include/net/mana/mana.h
>> +++ b/include/net/mana/mana.h
>> @@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
>> struct gdma_wqe_request wqe_req;
>>
>> void *buf_va;
>> + bool from_pool; /* allocated from a page pool */
>
> suggest you use flags and not bools, as bools waste 7 bits each, plus
> your packing of this struct will be full of holes, made worse by this
> patch. (see pahole tool)
>
Agreed.
>
>>
>> /* SGL of the buffer going to be sent has part of the work request. */
>> u32 num_sge;
>> @@ -330,6 +331,8 @@ struct mana_rxq {
>> bool xdp_flush;
>> int xdp_rc; /* XDP redirect return code */
>>
>> + struct page_pool *page_pool;
>> +
>> /* MUST BE THE LAST MEMBER:
>> * Each receive buffer has an associated mana_recv_buf_oob.
>> */
>
>
> The rest of the patch looks ok and is remarkably compact for a
> conversion to page pool. I'd prefer someone with more page pool exposure
> review this for correctness, but FWIW
>
Both Jakub and I have reviewed the page_pool parts, and I think we are
in a good place.
Looking at the driver, I wonder why you are keeping the driver local
memory cache (when PP is also contains a memory cache) ?
(I assume there is a good reason, so this is not blocking patch)
>
> Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Thanks for taking your time to review.
I'm ready to ACK once the description is improved a bit :-)
--Jesper
pw-bot: cr
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH V5,net-next] net: mana: Add page pool for RX buffers
2023-08-04 10:49 ` Jesper Dangaard Brouer
@ 2023-08-04 20:20 ` Haiyang Zhang
0 siblings, 0 replies; 4+ messages in thread
From: Haiyang Zhang @ 2023-08-04 20:20 UTC (permalink / raw)
To: Jesper Dangaard Brouer, Jesse Brandeburg,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Dexuan Cui, KY Srinivasan, Paul Rosswurm, olaf@aepfle.de,
vkuznets, davem@davemloft.net, wei.liu@kernel.org,
edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
leon@kernel.org, Long Li, ssengar@linux.microsoft.com,
linux-rdma@vger.kernel.org, daniel@iogearbox.net,
john.fastabend@gmail.com, bpf@vger.kernel.org, ast@kernel.org,
Ajay Sharma, tglx@linutronix.de, shradhagupta@linux.microsoft.com,
linux-kernel@vger.kernel.org, Ilias Apalodimas
> -----Original Message-----
> From: Jesper Dangaard Brouer <hawk@kernel.org>
> Sent: Friday, August 4, 2023 6:50 AM
> To: Jesse Brandeburg <jesse.brandeburg@intel.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org
> Cc: Dexuan Cui <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>;
> Paul Rosswurm <paulros@microsoft.com>; olaf@aepfle.de; vkuznets
> <vkuznets@redhat.com>; davem@davemloft.net; wei.liu@kernel.org;
> edumazet@google.com; kuba@kernel.org; pabeni@redhat.com;
> leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>; hawk@kernel.org;
> tglx@linutronix.de; shradhagupta@linux.microsoft.com; linux-
> kernel@vger.kernel.org; Ilias Apalodimas <ilias.apalodimas@linaro.org>
> Subject: Re: [PATCH V5,net-next] net: mana: Add page pool for RX buffers
>
>
>
> On 03/08/2023 03.44, Jesse Brandeburg wrote:
> > On 8/2/2023 11:07 AM, Haiyang Zhang wrote:
> >> Add page pool for RX buffers for faster buffer cycle and reduce CPU
> >> usage.
> >>
>
> Can you add some info on the performance improvement this patch gives?
I will add this to the patch description.
>
> Your previous post mentioned:
> > With iperf and 128 threads test, this patch improved the throughput
> by 12-15%, and decreased the IRQ associated CPU's usage from 99-100% to
> 10-50%.
>
>
> >> The standard page pool API is used.
> >>
> >> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> >> ---
> >> V5:
> >> In err path, set page_pool_put_full_page(..., false) as suggested by
> >> Jakub Kicinski
> >> V4:
> >> Add nid setting, remove page_pool_nid_changed(), as suggested by
> >> Jesper Dangaard Brouer
> >> V3:
> >> Update xdp mem model, pool param, alloc as suggested by Jakub Kicinski
> >> V2:
> >> Use the standard page pool API as suggested by Jesper Dangaard Brouer
> >> ---
> >
> >> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> >> index 024ad8ddb27e..b12859511839 100644
> >> --- a/include/net/mana/mana.h
> >> +++ b/include/net/mana/mana.h
> >> @@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
> >> struct gdma_wqe_request wqe_req;
> >>
> >> void *buf_va;
> >> + bool from_pool; /* allocated from a page pool */
> >
> > suggest you use flags and not bools, as bools waste 7 bits each, plus
> > your packing of this struct will be full of holes, made worse by this
> > patch. (see pahole tool)
> >
>
> Agreed.
Thanks, I will do this when adding more flags in future patches.
>
> >
> >>
> >> /* SGL of the buffer going to be sent has part of the work request. */
> >> u32 num_sge;
> >> @@ -330,6 +331,8 @@ struct mana_rxq {
> >> bool xdp_flush;
> >> int xdp_rc; /* XDP redirect return code */
> >>
> >> + struct page_pool *page_pool;
> >> +
> >> /* MUST BE THE LAST MEMBER:
> >> * Each receive buffer has an associated mana_recv_buf_oob.
> >> */
> >
> >
> > The rest of the patch looks ok and is remarkably compact for a
> > conversion to page pool. I'd prefer someone with more page pool exposure
> > review this for correctness, but FWIW
> >
>
> Both Jakub and I have reviewed the page_pool parts, and I think we are
> in a good place.
>
> Looking at the driver, I wonder why you are keeping the driver local
> memory cache (when PP is also contains a memory cache) ?
> (I assume there is a good reason, so this is not blocking patch)
You mean the "else" part of the code below? It's for a compound page from
napi_alloc_frag(), not from the pool. If any error happens, we save it
locally for re-use.
drop:
if (from_pool) {
page_pool_recycle_direct(rxq->page_pool,
virt_to_head_page(buf_va));
} else {
WARN_ON_ONCE(rxq->xdp_save_va);
/* Save for reuse */
rxq->xdp_save_va = buf_va;
}
>
> >
> > Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
>
> Thanks for taking your time to review.
>
> I'm ready to ACK once the description is improved a bit :-)
>
> --Jesper
> pw-bot: cr
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-08-04 20:21 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-02 18:07 [PATCH V5,net-next] net: mana: Add page pool for RX buffers Haiyang Zhang
2023-08-03 1:44 ` Jesse Brandeburg
2023-08-04 10:49 ` Jesper Dangaard Brouer
2023-08-04 20:20 ` Haiyang Zhang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).