netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Haiyang Zhang <haiyangz@microsoft.com>,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: decui@microsoft.com, kys@microsoft.com, paulros@microsoft.com,
	olaf@aepfle.de, vkuznets@redhat.com, davem@davemloft.net,
	wei.liu@kernel.org, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, leon@kernel.org, longli@microsoft.com,
	ssengar@linux.microsoft.com, linux-rdma@vger.kernel.org,
	daniel@iogearbox.net, john.fastabend@gmail.com,
	bpf@vger.kernel.org, ast@kernel.org, sharmaajay@microsoft.com,
	hawk@kernel.org, tglx@linutronix.de,
	shradhagupta@linux.microsoft.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH V6,net-next] net: mana: Add page pool for RX buffers
Date: Sat, 5 Aug 2023 18:02:32 +0800	[thread overview]
Message-ID: <d5cca789-c25e-86ad-2579-0dba00e079b3@linux.dev> (raw)
In-Reply-To: <1691181233-25286-1-git-send-email-haiyangz@microsoft.com>

在 2023/8/5 4:33, Haiyang Zhang 写道:
> Add page pool for RX buffers for faster buffer cycle and reduce CPU
> usage.
> 
> The standard page pool API is used.
> 
> With iperf and 128 threads test, this patch improved the throughput
> by 12-15%, and decreased the IRQ associated CPU's usage from 99-100% to
> 10-50%.

https://www.spinics.net/lists/netdev/msg584734.html

The performance of throughput and cpu utility is very good. I have a 
similar patch series with this in the above link.
And David Miller had the following comments:
"
The system is supposed to hold onto enough atomic memory to absorb all
reasonable situations like this.

If anything a solution to this problem belongs generically somewhere,
not in a driver.  And furthermore looping over an allocation attempt
with a delay is strongly discouraged.
"

Hope your commit can be merged into upstream (pray).

Zhu Yanjun

> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> ---
> V6:
> Added perf info as suggested by Jesper Dangaard Brouer
> V5:
> In err path, set page_pool_put_full_page(..., false) as suggested by
> Jakub Kicinski
> V4:
> Add nid setting, remove page_pool_nid_changed(), as suggested by
> Jesper Dangaard Brouer
> V3:
> Update xdp mem model, pool param, alloc as suggested by Jakub Kicinski
> V2:
> Use the standard page pool API as suggested by Jesper Dangaard Brouer
> ---
>   drivers/net/ethernet/microsoft/mana/mana_en.c | 90 +++++++++++++++----
>   include/net/mana/mana.h                       |  3 +
>   2 files changed, 77 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index ac2acc9aca9d..1a4ac1c8736e 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1414,8 +1414,8 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
>   	return skb;
>   }
>   
> -static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
> -			struct mana_rxq *rxq)
> +static void mana_rx_skb(void *buf_va, bool from_pool,
> +			struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq)
>   {
>   	struct mana_stats_rx *rx_stats = &rxq->stats;
>   	struct net_device *ndev = rxq->ndev;
> @@ -1448,6 +1448,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>   	if (!skb)
>   		goto drop;
>   
> +	if (from_pool)
> +		skb_mark_for_recycle(skb);
> +
>   	skb->dev = napi->dev;
>   
>   	skb->protocol = eth_type_trans(skb, ndev);
> @@ -1498,9 +1501,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>   	u64_stats_update_end(&rx_stats->syncp);
>   
>   drop:
> -	WARN_ON_ONCE(rxq->xdp_save_va);
> -	/* Save for reuse */
> -	rxq->xdp_save_va = buf_va;
> +	if (from_pool) {
> +		page_pool_recycle_direct(rxq->page_pool,
> +					 virt_to_head_page(buf_va));
> +	} else {
> +		WARN_ON_ONCE(rxq->xdp_save_va);
> +		/* Save for reuse */
> +		rxq->xdp_save_va = buf_va;
> +	}
>   
>   	++ndev->stats.rx_dropped;
>   
> @@ -1508,11 +1516,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>   }
>   
>   static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
> -			     dma_addr_t *da, bool is_napi)
> +			     dma_addr_t *da, bool *from_pool, bool is_napi)
>   {
>   	struct page *page;
>   	void *va;
>   
> +	*from_pool = false;
> +
>   	/* Reuse XDP dropped page if available */
>   	if (rxq->xdp_save_va) {
>   		va = rxq->xdp_save_va;
> @@ -1533,17 +1543,22 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
>   			return NULL;
>   		}
>   	} else {
> -		page = dev_alloc_page();
> +		page = page_pool_dev_alloc_pages(rxq->page_pool);
>   		if (!page)
>   			return NULL;
>   
> +		*from_pool = true;
>   		va = page_to_virt(page);
>   	}
>   
>   	*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
>   			     DMA_FROM_DEVICE);
>   	if (dma_mapping_error(dev, *da)) {
> -		put_page(virt_to_head_page(va));
> +		if (*from_pool)
> +			page_pool_put_full_page(rxq->page_pool, page, false);
> +		else
> +			put_page(virt_to_head_page(va));
> +
>   		return NULL;
>   	}
>   
> @@ -1552,21 +1567,25 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
>   
>   /* Allocate frag for rx buffer, and save the old buf */
>   static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
> -			       struct mana_recv_buf_oob *rxoob, void **old_buf)
> +			       struct mana_recv_buf_oob *rxoob, void **old_buf,
> +			       bool *old_fp)
>   {
> +	bool from_pool;
>   	dma_addr_t da;
>   	void *va;
>   
> -	va = mana_get_rxfrag(rxq, dev, &da, true);
> +	va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true);
>   	if (!va)
>   		return;
>   
>   	dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
>   			 DMA_FROM_DEVICE);
>   	*old_buf = rxoob->buf_va;
> +	*old_fp = rxoob->from_pool;
>   
>   	rxoob->buf_va = va;
>   	rxoob->sgl[0].address = da;
> +	rxoob->from_pool = from_pool;
>   }
>   
>   static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
> @@ -1580,6 +1599,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
>   	struct device *dev = gc->dev;
>   	void *old_buf = NULL;
>   	u32 curr, pktlen;
> +	bool old_fp;
>   
>   	apc = netdev_priv(ndev);
>   
> @@ -1622,12 +1642,12 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
>   	rxbuf_oob = &rxq->rx_oobs[curr];
>   	WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
>   
> -	mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf);
> +	mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp);
>   
>   	/* Unsuccessful refill will have old_buf == NULL.
>   	 * In this case, mana_rx_skb() will drop the packet.
>   	 */
> -	mana_rx_skb(old_buf, oob, rxq);
> +	mana_rx_skb(old_buf, old_fp, oob, rxq);
>   
>   drop:
>   	mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
> @@ -1887,6 +1907,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
>   	struct mana_recv_buf_oob *rx_oob;
>   	struct device *dev = gc->dev;
>   	struct napi_struct *napi;
> +	struct page *page;
>   	int i;
>   
>   	if (!rxq)
> @@ -1919,10 +1940,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
>   		dma_unmap_single(dev, rx_oob->sgl[0].address,
>   				 rx_oob->sgl[0].size, DMA_FROM_DEVICE);
>   
> -		put_page(virt_to_head_page(rx_oob->buf_va));
> +		page = virt_to_head_page(rx_oob->buf_va);
> +
> +		if (rx_oob->from_pool)
> +			page_pool_put_full_page(rxq->page_pool, page, false);
> +		else
> +			put_page(page);
> +
>   		rx_oob->buf_va = NULL;
>   	}
>   
> +	page_pool_destroy(rxq->page_pool);
> +
>   	if (rxq->gdma_rq)
>   		mana_gd_destroy_queue(gc, rxq->gdma_rq);
>   
> @@ -1933,18 +1962,20 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
>   			    struct mana_rxq *rxq, struct device *dev)
>   {
>   	struct mana_port_context *mpc = netdev_priv(rxq->ndev);
> +	bool from_pool = false;
>   	dma_addr_t da;
>   	void *va;
>   
>   	if (mpc->rxbufs_pre)
>   		va = mana_get_rxbuf_pre(rxq, &da);
>   	else
> -		va = mana_get_rxfrag(rxq, dev, &da, false);
> +		va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false);
>   
>   	if (!va)
>   		return -ENOMEM;
>   
>   	rx_oob->buf_va = va;
> +	rx_oob->from_pool = from_pool;
>   
>   	rx_oob->sgl[0].address = da;
>   	rx_oob->sgl[0].size = rxq->datasize;
> @@ -2014,6 +2045,26 @@ static int mana_push_wqe(struct mana_rxq *rxq)
>   	return 0;
>   }
>   
> +static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
> +{
> +	struct page_pool_params pprm = {};
> +	int ret;
> +
> +	pprm.pool_size = RX_BUFFERS_PER_QUEUE;
> +	pprm.nid = gc->numa_node;
> +	pprm.napi = &rxq->rx_cq.napi;
> +
> +	rxq->page_pool = page_pool_create(&pprm);
> +
> +	if (IS_ERR(rxq->page_pool)) {
> +		ret = PTR_ERR(rxq->page_pool);
> +		rxq->page_pool = NULL;
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>   					u32 rxq_idx, struct mana_eq *eq,
>   					struct net_device *ndev)
> @@ -2043,6 +2094,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>   	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
>   			   &rxq->headroom);
>   
> +	/* Create page pool for RX queue */
> +	err = mana_create_page_pool(rxq, gc);
> +	if (err) {
> +		netdev_err(ndev, "Create page pool err:%d\n", err);
> +		goto out;
> +	}
> +
>   	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
>   	if (err)
>   		goto out;
> @@ -2114,8 +2172,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>   
>   	WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
>   				 cq->napi.napi_id));
> -	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
> -					   MEM_TYPE_PAGE_SHARED, NULL));
> +	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
> +					   rxq->page_pool));
>   
>   	napi_enable(&cq->napi);
>   
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 024ad8ddb27e..b12859511839 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
>   	struct gdma_wqe_request wqe_req;
>   
>   	void *buf_va;
> +	bool from_pool; /* allocated from a page pool */
>   
>   	/* SGL of the buffer going to be sent has part of the work request. */
>   	u32 num_sge;
> @@ -330,6 +331,8 @@ struct mana_rxq {
>   	bool xdp_flush;
>   	int xdp_rc; /* XDP redirect return code */
>   
> +	struct page_pool *page_pool;
> +
>   	/* MUST BE THE LAST MEMBER:
>   	 * Each receive buffer has an associated mana_recv_buf_oob.
>   	 */


  reply	other threads:[~2023-08-05 10:03 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-04 20:33 [PATCH V6,net-next] net: mana: Add page pool for RX buffers Haiyang Zhang
2023-08-05 10:02 ` Zhu Yanjun [this message]
2023-08-06  7:40 ` patchwork-bot+netdevbpf
2023-08-07  8:35 ` Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d5cca789-c25e-86ad-2579-0dba00e079b3@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=decui@microsoft.com \
    --cc=edumazet@google.com \
    --cc=haiyangz@microsoft.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=kys@microsoft.com \
    --cc=leon@kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=netdev@vger.kernel.org \
    --cc=olaf@aepfle.de \
    --cc=pabeni@redhat.com \
    --cc=paulros@microsoft.com \
    --cc=sharmaajay@microsoft.com \
    --cc=shradhagupta@linux.microsoft.com \
    --cc=ssengar@linux.microsoft.com \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    --cc=wei.liu@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).