netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Taehee Yoo <ap420073@gmail.com>
To: davem@davemloft.net, kuba@kernel.org, pabeni@redhat.com,
	edumazet@google.com, almasrymina@google.com,
	netdev@vger.kernel.org, linux-doc@vger.kernel.org,
	donald.hunter@gmail.com, corbet@lwn.net,
	michael.chan@broadcom.com
Cc: kory.maincent@bootlin.com, andrew@lunn.ch,
	maxime.chevallier@bootlin.com, danieller@nvidia.com,
	hengqi@linux.alibaba.com, ecree.xilinx@gmail.com,
	przemyslaw.kitszel@intel.com, hkallweit1@gmail.com,
	ahmed.zaki@intel.com, paul.greenwalt@intel.com,
	rrameshbabu@nvidia.com, idosch@nvidia.com,
	asml.silence@gmail.com, kaiyuanz@google.com, willemb@google.com,
	aleksander.lobakin@intel.com, dw@davidwei.uk,
	sridhar.samudrala@intel.com, bcreeley@amd.com,
	ap420073@gmail.com
Subject: [PATCH net-next v3 7/7] bnxt_en: add support for device memory tcp
Date: Thu,  3 Oct 2024 16:06:20 +0000	[thread overview]
Message-ID: <20241003160620.1521626-8-ap420073@gmail.com> (raw)
In-Reply-To: <20241003160620.1521626-1-ap420073@gmail.com>

Currently, bnxt_en driver satisfies the requirements of Device memory
TCP, which is tcp-data-split.
So, it implements Device memory TCP for bnxt_en driver.

From now on, the aggregation ring handles netmem_ref instead of page
regardless of the on/off of netmem.
So, for the aggregation ring, memory will be handled with the netmem
page_pool API instead of generic page_pool API.

If Devmem is enabled, netmem_ref is used as-is and if Devmem is not
enabled, netmem_ref will be converted to page and that is used.

Driver recognizes whether the devmem is set or unset based on the
mp_params.mp_priv is not NULL.
Only if devmem is set, it passes PP_FLAG_ALLOW_UNREADABLE_NETMEM.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---

v3:
 - Patch added

 drivers/net/ethernet/broadcom/Kconfig     |  1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 98 +++++++++++++++--------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 +-
 3 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 75ca3ddda1f5..f37ff12d4746 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -211,6 +211,7 @@ config BNXT
 	select FW_LOADER
 	select LIBCRC32C
 	select NET_DEVLINK
+	select NET_DEVMEM
 	select PAGE_POOL
 	select DIMLIB
 	select AUXILIARY_BUS
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 872b15842b11..64e07d247f97 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -55,6 +55,7 @@
 #include <net/page_pool/helpers.h>
 #include <linux/align.h>
 #include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -863,6 +864,22 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 		bnapi->events &= ~BNXT_TX_CMP_EVENT;
 }
 
+static netmem_ref __bnxt_alloc_rx_netmem(struct bnxt *bp, dma_addr_t *mapping,
+					 struct bnxt_rx_ring_info *rxr,
+					 unsigned int *offset,
+					 gfp_t gfp)
+{
+	netmem_ref netmem;
+
+	netmem = page_pool_alloc_netmem(rxr->page_pool, GFP_ATOMIC);
+	if (!netmem)
+		return 0;
+	*offset = 0;
+
+	*mapping = page_pool_get_dma_addr_netmem(netmem) + *offset;
+	return netmem;
+}
+
 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
 					 struct bnxt_rx_ring_info *rxr,
 					 unsigned int *offset,
@@ -972,21 +989,21 @@ static inline u16 bnxt_find_next_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx)
 	return next;
 }
 
-static inline int bnxt_alloc_rx_page(struct bnxt *bp,
-				     struct bnxt_rx_ring_info *rxr,
-				     u16 prod, gfp_t gfp)
+static inline int bnxt_alloc_rx_netmem(struct bnxt *bp,
+				       struct bnxt_rx_ring_info *rxr,
+				       u16 prod, gfp_t gfp)
 {
 	struct rx_bd *rxbd =
 		&rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)];
 	struct bnxt_sw_rx_agg_bd *rx_agg_buf;
-	struct page *page;
+	netmem_ref netmem;
 	dma_addr_t mapping;
 	u16 sw_prod = rxr->rx_sw_agg_prod;
 	unsigned int offset = 0;
 
-	page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
+	netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, &offset, gfp);
 
-	if (!page)
+	if (!netmem)
 		return -ENOMEM;
 
 	if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap)))
@@ -996,7 +1013,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
 	rx_agg_buf = &rxr->rx_agg_ring[sw_prod];
 	rxr->rx_sw_agg_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod));
 
-	rx_agg_buf->page = page;
+	rx_agg_buf->netmem = netmem;
 	rx_agg_buf->offset = offset;
 	rx_agg_buf->mapping = mapping;
 	rxbd->rx_bd_haddr = cpu_to_le64(mapping);
@@ -1044,7 +1061,7 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
 		struct rx_agg_cmp *agg;
 		struct bnxt_sw_rx_agg_bd *cons_rx_buf, *prod_rx_buf;
 		struct rx_bd *prod_bd;
-		struct page *page;
+		netmem_ref netmem;
 
 		if (p5_tpa)
 			agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i);
@@ -1061,11 +1078,11 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
 		cons_rx_buf = &rxr->rx_agg_ring[cons];
 
 		/* It is possible for sw_prod to be equal to cons, so
-		 * set cons_rx_buf->page to NULL first.
+		 * set cons_rx_buf->netmem to 0 first.
 		 */
-		page = cons_rx_buf->page;
-		cons_rx_buf->page = NULL;
-		prod_rx_buf->page = page;
+		netmem = cons_rx_buf->netmem;
+		cons_rx_buf->netmem = 0;
+		prod_rx_buf->netmem = netmem;
 		prod_rx_buf->offset = cons_rx_buf->offset;
 
 		prod_rx_buf->mapping = cons_rx_buf->mapping;
@@ -1192,6 +1209,7 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp,
 
 static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 			       struct bnxt_cp_ring_info *cpr,
+			       struct sk_buff *skb,
 			       struct skb_shared_info *shinfo,
 			       u16 idx, u32 agg_bufs, bool tpa,
 			       struct xdp_buff *xdp)
@@ -1211,7 +1229,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 		u16 cons, frag_len;
 		struct rx_agg_cmp *agg;
 		struct bnxt_sw_rx_agg_bd *cons_rx_buf;
-		struct page *page;
+		netmem_ref netmem;
 		dma_addr_t mapping;
 
 		if (p5_tpa)
@@ -1223,9 +1241,15 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 			    RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
 
 		cons_rx_buf = &rxr->rx_agg_ring[cons];
-		skb_frag_fill_page_desc(frag, cons_rx_buf->page,
-					cons_rx_buf->offset, frag_len);
-		shinfo->nr_frags = i + 1;
+		if (skb) {
+			skb_add_rx_frag_netmem(skb, i, cons_rx_buf->netmem,
+					       cons_rx_buf->offset, frag_len,
+					       BNXT_RX_PAGE_SIZE);
+		} else {
+			skb_frag_fill_page_desc(frag, netmem_to_page(cons_rx_buf->netmem),
+						cons_rx_buf->offset, frag_len);
+			shinfo->nr_frags = i + 1;
+		}
 		__clear_bit(cons, rxr->rx_agg_bmap);
 
 		/* It is possible for bnxt_alloc_rx_page() to allocate
@@ -1233,15 +1257,15 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 		 * need to clear the cons entry now.
 		 */
 		mapping = cons_rx_buf->mapping;
-		page = cons_rx_buf->page;
-		cons_rx_buf->page = NULL;
+		netmem = cons_rx_buf->netmem;
+		cons_rx_buf->netmem = 0;
 
-		if (xdp && page_is_pfmemalloc(page))
+		if (xdp && page_is_pfmemalloc(netmem_to_page(netmem)))
 			xdp_buff_set_frag_pfmemalloc(xdp);
 
-		if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) {
+		if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_ATOMIC) != 0) {
 			--shinfo->nr_frags;
-			cons_rx_buf->page = page;
+			cons_rx_buf->netmem = netmem;
 
 			/* Update prod since possibly some pages have been
 			 * allocated already.
@@ -1269,7 +1293,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	u32 total_frag_len = 0;
 
-	total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, idx,
+	total_frag_len = __bnxt_rx_agg_pages(bp, cpr, skb, shinfo, idx,
 					     agg_bufs, tpa, NULL);
 	if (!total_frag_len) {
 		skb_mark_for_recycle(skb);
@@ -1277,9 +1301,6 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
 		return NULL;
 	}
 
-	skb->data_len += total_frag_len;
-	skb->len += total_frag_len;
-	skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs;
 	return skb;
 }
 
@@ -1294,7 +1315,7 @@ static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp,
 	if (!xdp_buff_has_frags(xdp))
 		shinfo->nr_frags = 0;
 
-	total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo,
+	total_frag_len = __bnxt_rx_agg_pages(bp, cpr, NULL, shinfo,
 					     idx, agg_bufs, tpa, xdp);
 	if (total_frag_len) {
 		xdp_buff_set_frags_flag(xdp);
@@ -3342,15 +3363,15 @@ static void bnxt_free_one_rx_agg_ring(struct bnxt *bp, struct bnxt_rx_ring_info
 
 	for (i = 0; i < max_idx; i++) {
 		struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i];
-		struct page *page = rx_agg_buf->page;
+		netmem_ref netmem = rx_agg_buf->netmem;
 
-		if (!page)
+		if (!netmem)
 			continue;
 
-		rx_agg_buf->page = NULL;
+		rx_agg_buf->netmem = 0;
 		__clear_bit(i, rxr->rx_agg_bmap);
 
-		page_pool_recycle_direct(rxr->page_pool, page);
+		page_pool_put_full_netmem(rxr->page_pool, netmem, true);
 	}
 }
 
@@ -3608,9 +3629,11 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
 
 static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
 				   struct bnxt_rx_ring_info *rxr,
+				   int queue_idx,
 				   int numa_node)
 {
 	struct page_pool_params pp = { 0 };
+	struct netdev_rx_queue *rxq;
 
 	pp.pool_size = bp->rx_agg_ring_size;
 	if (BNXT_RX_PAGE_MODE(bp))
@@ -3621,8 +3644,15 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
 	pp.dev = &bp->pdev->dev;
 	pp.dma_dir = bp->rx_dir;
 	pp.max_len = PAGE_SIZE;
-	pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+	pp.order = 0;
+
+	rxq = __netif_get_rx_queue(bp->dev, queue_idx);
+	if (rxq->mp_params.mp_priv)
+		pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_ALLOW_UNREADABLE_NETMEM;
+	else
+		pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
 
+	pp.queue_idx = queue_idx;
 	rxr->page_pool = page_pool_create(&pp);
 	if (IS_ERR(rxr->page_pool)) {
 		int err = PTR_ERR(rxr->page_pool);
@@ -3655,7 +3685,7 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
 		cpu_node = cpu_to_node(cpu);
 		netdev_dbg(bp->dev, "Allocating page pool for rx_ring[%d] on numa_node: %d\n",
 			   i, cpu_node);
-		rc = bnxt_alloc_rx_page_pool(bp, rxr, cpu_node);
+		rc = bnxt_alloc_rx_page_pool(bp, rxr, i, cpu_node);
 		if (rc)
 			return rc;
 
@@ -4154,7 +4184,7 @@ static void bnxt_alloc_one_rx_ring_page(struct bnxt *bp,
 
 	prod = rxr->rx_agg_prod;
 	for (i = 0; i < bp->rx_agg_ring_size; i++) {
-		if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_KERNEL)) {
+		if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) {
 			netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n",
 				    ring_nr, i, bp->rx_ring_size);
 			break;
@@ -15063,7 +15093,7 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
 	clone->rx_sw_agg_prod = 0;
 	clone->rx_next_cons = 0;
 
-	rc = bnxt_alloc_rx_page_pool(bp, clone, rxr->page_pool->p.nid);
+	rc = bnxt_alloc_rx_page_pool(bp, clone, idx, rxr->page_pool->p.nid);
 	if (rc)
 		return rc;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 48f390519c35..3cf57a3c7664 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -895,7 +895,7 @@ struct bnxt_sw_rx_bd {
 };
 
 struct bnxt_sw_rx_agg_bd {
-	struct page		*page;
+	netmem_ref		netmem;
 	unsigned int		offset;
 	dma_addr_t		mapping;
 };
-- 
2.34.1


  parent reply	other threads:[~2024-10-03 16:07 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-03 16:06 [PATCH net-next v3 0/7] bnxt_en: implement device memory TCP for bnxt Taehee Yoo
2024-10-03 16:06 ` [PATCH net-next v3 1/7] bnxt_en: add support for rx-copybreak ethtool command Taehee Yoo
2024-10-03 16:57   ` Brett Creeley
2024-10-03 17:15     ` Taehee Yoo
2024-10-03 17:13   ` Michael Chan
2024-10-03 17:22     ` Taehee Yoo
2024-10-03 17:43       ` Michael Chan
2024-10-03 18:28         ` Taehee Yoo
2024-10-03 18:34         ` Andrew Lunn
2024-10-05  6:29           ` Taehee Yoo
2024-10-08 18:10             ` Jakub Kicinski
2024-10-08 19:38               ` Michael Chan
2024-10-08 19:53                 ` Jakub Kicinski
2024-10-08 20:35                   ` Michael Chan
2024-10-03 16:06 ` [PATCH net-next v3 2/7] bnxt_en: add support for tcp-data-split " Taehee Yoo
2024-10-08 18:19   ` Jakub Kicinski
2024-10-09 13:54     ` Taehee Yoo
2024-10-09 15:28       ` Jakub Kicinski
2024-10-09 17:47         ` Taehee Yoo
2024-10-31 17:34         ` Taehee Yoo
2024-10-31 23:56           ` Jakub Kicinski
2024-11-01 17:11             ` Taehee Yoo
2024-10-03 16:06 ` [PATCH net-next v3 3/7] net: ethtool: add support for configuring tcp-data-split-thresh Taehee Yoo
2024-10-03 18:25   ` Mina Almasry
2024-10-03 19:33     ` Taehee Yoo
2024-10-04  1:47       ` Mina Almasry
2024-10-05  6:11         ` Taehee Yoo
2024-10-08 18:33   ` Jakub Kicinski
2024-10-09 14:25     ` Taehee Yoo
2024-10-09 15:46       ` Jakub Kicinski
2024-10-09 17:49         ` Taehee Yoo
2024-10-03 16:06 ` [PATCH net-next v3 4/7] bnxt_en: add support for tcp-data-split-thresh ethtool command Taehee Yoo
2024-10-03 18:13   ` Brett Creeley
2024-10-03 19:13     ` Taehee Yoo
2024-10-08 18:35   ` Jakub Kicinski
2024-10-09 14:31     ` Taehee Yoo
2024-10-03 16:06 ` [PATCH net-next v3 5/7] net: devmem: add ring parameter filtering Taehee Yoo
2024-10-03 18:29   ` Mina Almasry
2024-10-04  3:57     ` Taehee Yoo
2024-10-03 18:35   ` Brett Creeley
2024-10-03 18:49     ` Mina Almasry
2024-10-08 19:28       ` Jakub Kicinski
2024-10-09 14:35         ` Taehee Yoo
2024-10-04  4:01     ` Taehee Yoo
2024-10-03 16:06 ` [PATCH net-next v3 6/7] net: ethtool: " Taehee Yoo
2024-10-03 18:32   ` Mina Almasry
2024-10-03 19:35     ` Taehee Yoo
2024-10-03 16:06 ` Taehee Yoo [this message]
2024-10-03 18:43   ` [PATCH net-next v3 7/7] bnxt_en: add support for device memory tcp Mina Almasry
2024-10-04 10:34     ` Taehee Yoo
2024-10-08  2:57       ` David Wei
2024-10-09 15:02         ` Taehee Yoo
2024-10-08 19:50       ` Jakub Kicinski
2024-10-09 15:37         ` Taehee Yoo
2024-10-10  0:01           ` Jakub Kicinski
2024-10-10 17:44             ` Mina Almasry
2024-10-11  1:34               ` Jakub Kicinski
2024-10-11 17:33                 ` Mina Almasry
2024-10-11 23:42                   ` Jason Gunthorpe
2024-10-14 22:38                     ` Mina Almasry
2024-10-15  0:16                       ` Jakub Kicinski
2024-10-15  1:10                         ` Mina Almasry
2024-10-15 12:44                           ` Jason Gunthorpe
2024-10-18  8:25                             ` Mina Almasry
2024-10-19 13:55                               ` Taehee Yoo
2024-10-15 14:29                       ` Pavel Begunkov
2024-10-15 17:38                         ` David Wei
2024-10-05  3:48   ` kernel test robot
2024-10-08  2:45   ` David Wei
2024-10-08  3:54     ` Taehee Yoo
2024-10-08  3:58       ` Taehee Yoo
2024-10-16 20:17 ` [PATCH net-next v3 0/7] bnxt_en: implement device memory TCP for bnxt Stanislav Fomichev
2024-10-17  8:58   ` Taehee Yoo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241003160620.1521626-8-ap420073@gmail.com \
    --to=ap420073@gmail.com \
    --cc=ahmed.zaki@intel.com \
    --cc=aleksander.lobakin@intel.com \
    --cc=almasrymina@google.com \
    --cc=andrew@lunn.ch \
    --cc=asml.silence@gmail.com \
    --cc=bcreeley@amd.com \
    --cc=corbet@lwn.net \
    --cc=danieller@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=donald.hunter@gmail.com \
    --cc=dw@davidwei.uk \
    --cc=ecree.xilinx@gmail.com \
    --cc=edumazet@google.com \
    --cc=hengqi@linux.alibaba.com \
    --cc=hkallweit1@gmail.com \
    --cc=idosch@nvidia.com \
    --cc=kaiyuanz@google.com \
    --cc=kory.maincent@bootlin.com \
    --cc=kuba@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=maxime.chevallier@bootlin.com \
    --cc=michael.chan@broadcom.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=paul.greenwalt@intel.com \
    --cc=przemyslaw.kitszel@intel.com \
    --cc=rrameshbabu@nvidia.com \
    --cc=sridhar.samudrala@intel.com \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).