* [PATCH net-next] net: bcmasp: Switch to page pool for RX path
@ 2026-04-07 16:26 Florian Fainelli
2026-04-07 21:04 ` Nicolai Buchwitz
0 siblings, 1 reply; 2+ messages in thread
From: Florian Fainelli @ 2026-04-07 16:26 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Justin Chen, Vikas Gupta,
Bhargava Marreddy, Rajashekar Hudumula, Arnd Bergmann,
Eric Biggers, Markus Blöchl, Heiner Kallweit,
Fernando Fernandez Mancera, open list,
open list:BROADCOM ASP 2.0 ETHERNET DRIVER
This shows an improvement of 1.9% in reducing the CPU cycles and data
cache misses.
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
---
drivers/net/ethernet/broadcom/Kconfig | 1 +
drivers/net/ethernet/broadcom/asp2/bcmasp.h | 8 +-
.../net/ethernet/broadcom/asp2/bcmasp_intf.c | 125 +++++++++++++++---
.../ethernet/broadcom/asp2/bcmasp_intf_defs.h | 4 +
4 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index dd164acafd01..4287edc7ddd6 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -272,6 +272,7 @@ config BCMASP
depends on OF
select PHYLIB
select MDIO_BCM_UNIMAC
+ select PAGE_POOL
help
This configuration enables the Broadcom ASP 2.0 Ethernet controller
driver which is present in Broadcom STB SoCs such as 72165.
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
index 29cd87335ec8..8c8ffaeadc79 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
@@ -6,6 +6,7 @@
#include <linux/phy.h>
#include <linux/io-64-nonatomic-hi-lo.h>
#include <uapi/linux/ethtool.h>
+#include <net/page_pool/helpers.h>
#define ASP_INTR2_OFFSET 0x1000
#define ASP_INTR2_STATUS 0x0
@@ -298,16 +299,19 @@ struct bcmasp_intf {
void __iomem *rx_edpkt_cfg;
void __iomem *rx_edpkt_dma;
int rx_edpkt_index;
- int rx_buf_order;
struct bcmasp_desc *rx_edpkt_cpu;
dma_addr_t rx_edpkt_dma_addr;
dma_addr_t rx_edpkt_dma_read;
dma_addr_t rx_edpkt_dma_valid;
- /* RX buffer prefetcher ring*/
+ /* Streaming RX data ring (RBUF_4K mode) */
void *rx_ring_cpu;
dma_addr_t rx_ring_dma;
dma_addr_t rx_ring_dma_valid;
+ int rx_buf_order;
+
+ /* Page pool for recycling RX SKB data pages */
+ struct page_pool *rx_page_pool;
struct napi_struct rx_napi;
struct bcmasp_res res;
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
index b368ec2fea43..84db0f5c2646 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/page_pool/helpers.h>
#include "bcmasp.h"
#include "bcmasp_intf_defs.h"
@@ -482,10 +483,14 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
struct bcmasp_desc *desc;
struct sk_buff *skb;
dma_addr_t valid;
+ struct page *page;
void *data;
u64 flags;
u32 len;
+ /* Hardware advances DMA_VALID as it writes each descriptor
+ * (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read.
+ */
valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1;
if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE)
valid = intf->rx_edpkt_dma_addr;
@@ -493,12 +498,12 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) {
desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index];
- /* Ensure that descriptor has been fully written to DRAM by
- * hardware before reading by the CPU
+ /* Ensure the descriptor has been fully written to DRAM by
+ * the hardware before the CPU reads it.
*/
rmb();
- /* Calculate virt addr by offsetting from physical addr */
+ /* Locate the packet data inside the streaming ring buffer. */
data = intf->rx_ring_cpu +
(DESC_ADDR(desc->buf) - intf->rx_ring_dma);
@@ -524,19 +529,38 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
len = desc->size;
- skb = napi_alloc_skb(napi, len);
- if (!skb) {
+ /* Allocate a page pool page as the SKB data area so the
+ * kernel can recycle it efficiently after the packet is
+ * consumed, avoiding repeated slab allocations.
+ */
+ page = page_pool_dev_alloc_pages(intf->rx_page_pool);
+ if (!page) {
u64_stats_update_begin(&stats->syncp);
u64_stats_inc(&stats->rx_dropped);
u64_stats_update_end(&stats->syncp);
intf->mib.alloc_rx_skb_failed++;
+ goto next;
+ }
+ skb = napi_build_skb(page_address(page), PAGE_SIZE);
+ if (!skb) {
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_dropped);
+ u64_stats_update_end(&stats->syncp);
+ intf->mib.alloc_rx_skb_failed++;
+ page_pool_recycle_direct(intf->rx_page_pool, page);
goto next;
}
+ /* Reserve headroom then copy the full descriptor payload
+ * (hardware prepends a 2-byte alignment pad at the start).
+ */
+ skb_reserve(skb, NET_SKB_PAD);
skb_put(skb, len);
memcpy(skb->data, data, len);
+ skb_mark_for_recycle(skb);
+ /* Skip the 2-byte hardware alignment pad. */
skb_pull(skb, 2);
len -= 2;
if (likely(intf->crc_fwd)) {
@@ -558,6 +582,7 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
u64_stats_update_end(&stats->syncp);
next:
+ /* Return this portion of the streaming ring buffer to HW. */
rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size),
RX_EDPKT_RING_BUFFER_READ);
@@ -661,12 +686,31 @@ static void bcmasp_adj_link(struct net_device *dev)
phy_print_status(phydev);
}
-static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
+static struct page_pool *
+bcmasp_rx_page_pool_create(struct bcmasp_intf *intf)
+{
+ struct page_pool_params pp_params = {
+ .order = 0,
+ /* Pages are CPU-side copy targets; no DMA mapping needed. */
+ .flags = 0,
+ .pool_size = NUM_4K_BUFFERS,
+ .nid = NUMA_NO_NODE,
+ .dev = &intf->parent->pdev->dev,
+ .dma_dir = DMA_FROM_DEVICE,
+ .offset = 0,
+ .max_len = PAGE_SIZE,
+ };
+
+ return page_pool_create(&pp_params);
+}
+
+static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
struct page *buffer_pg;
+ int ret;
- /* Alloc RX */
+ /* Contiguous streaming ring that hardware writes packet data into. */
intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
if (!buffer_pg)
@@ -675,13 +719,55 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
intf->rx_ring_cpu = page_to_virt(buffer_pg);
intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
- if (dma_mapping_error(kdev, intf->rx_ring_dma))
- goto free_rx_buffer;
+ if (dma_mapping_error(kdev, intf->rx_ring_dma)) {
+ ret = -ENOMEM;
+ goto free_ring_pages;
+ }
+
+ /* Page pool for SKB data areas (copy targets, not DMA buffers). */
+ intf->rx_page_pool = bcmasp_rx_page_pool_create(intf);
+ if (IS_ERR(intf->rx_page_pool)) {
+ ret = PTR_ERR(intf->rx_page_pool);
+ intf->rx_page_pool = NULL;
+ goto free_ring_dma;
+ }
+
+ return 0;
+
+free_ring_dma:
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+free_ring_pages:
+ __free_pages(buffer_pg, intf->rx_buf_order);
+ return ret;
+}
+
+static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+
+ page_pool_destroy(intf->rx_page_pool);
+ intf->rx_page_pool = NULL;
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+}
+
+static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+ int ret;
+
+ /* Alloc RX */
+ ret = bcmasp_alloc_rx_buffers(intf);
+ if (ret)
+ return ret;
intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
- &intf->rx_edpkt_dma_addr, GFP_KERNEL);
+ &intf->rx_edpkt_dma_addr,
+ GFP_KERNEL);
if (!intf->rx_edpkt_cpu)
- goto free_rx_buffer_dma;
+ goto free_rx_buffers;
/* Alloc TX */
intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
@@ -701,11 +787,8 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
free_rx_edpkt_dma:
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
intf->rx_edpkt_dma_addr);
-free_rx_buffer_dma:
- dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
-free_rx_buffer:
- __free_pages(buffer_pg, intf->rx_buf_order);
+free_rx_buffers:
+ bcmasp_reclaim_rx_buffers(intf);
return -ENOMEM;
}
@@ -717,9 +800,7 @@ static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf)
/* RX buffers */
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
intf->rx_edpkt_dma_addr);
- dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
- __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+ bcmasp_reclaim_rx_buffers(intf);
/* TX buffers */
dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
@@ -738,7 +819,7 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
/* Make sure channels are disabled */
rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
- /* Rx SPB */
+ /* Streaming data ring: hardware writes raw packet bytes here. */
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE);
@@ -747,7 +828,9 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
RX_EDPKT_RING_BUFFER_VALID);
- /* EDPKT */
+ /* EDPKT descriptor ring: hardware fills descriptors pointing into
+ * the streaming ring buffer above (RBUF_4K mode).
+ */
rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K <<
RX_EDPKT_CFG_CFG0_DBUF_SHIFT) |
(RX_EDPKT_CFG_CFG0_64_ALN <<
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h
index af7418348e81..0318f257452a 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h
@@ -246,6 +246,10 @@
((((intf)->channel - 6) * 0x14) + 0xa2000)
#define RX_SPB_TOP_BLKOUT 0x00
+/*
+ * Number of 4 KB pages that make up the contiguous RBUF_4K streaming ring
+ * and the page pool used as copy-target SKB data areas.
+ */
#define NUM_4K_BUFFERS 32
#define RING_BUFFER_SIZE (PAGE_SIZE * NUM_4K_BUFFERS)
--
2.34.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH net-next] net: bcmasp: Switch to page pool for RX path
2026-04-07 16:26 [PATCH net-next] net: bcmasp: Switch to page pool for RX path Florian Fainelli
@ 2026-04-07 21:04 ` Nicolai Buchwitz
0 siblings, 0 replies; 2+ messages in thread
From: Nicolai Buchwitz @ 2026-04-07 21:04 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Justin Chen, Vikas Gupta,
Bhargava Marreddy, Rajashekar Hudumula, Arnd Bergmann,
Eric Biggers, Markus Blöchl, Heiner Kallweit,
Fernando Fernandez Mancera, linux-kernel,
bcm-kernel-feedback-list
On 7.4.2026 18:26, Florian Fainelli wrote:
> This shows an improvement of 1.9% in reducing the CPU cycles and data
> cache misses.
>
> Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
> ---
> [...]
>
> -static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
> +static struct page_pool *
> +bcmasp_rx_page_pool_create(struct bcmasp_intf *intf)
> +{
> + struct page_pool_params pp_params = {
> + .order = 0,
> + /* Pages are CPU-side copy targets; no DMA mapping needed. */
> + .flags = 0,
> + .pool_size = NUM_4K_BUFFERS,
> + .nid = NUMA_NO_NODE,
> + .dev = &intf->parent->pdev->dev,
> + .dma_dir = DMA_FROM_DEVICE,
Nit: .dma_dir has AFAIK no effect without PP_FLAG_DMA_MAP, so
it's a bit misleading next to the "no DMA mapping needed" comment.
> + .offset = 0,
> + .max_len = PAGE_SIZE,
> + };
Other Broadcom page pool drivers set .napi and .netdev. Adding those
here would enable direct recycling and expose pool stats in ethtool -S.
> [...]
Thanks
Nicolai
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-04-07 21:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-07 16:26 [PATCH net-next] net: bcmasp: Switch to page pool for RX path Florian Fainelli
2026-04-07 21:04 ` Nicolai Buchwitz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox