* [PATCH net-next] r8169: migrate Rx path to page_pool
@ 2026-06-14 5:41 atharva-potdar
2026-06-14 20:26 ` Heiner Kallweit
2026-06-14 22:09 ` Francois Romieu
0 siblings, 2 replies; 6+ messages in thread
From: atharva-potdar @ 2026-06-14 5:41 UTC (permalink / raw)
To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
pabeni
Cc: netdev, atharva-potdar
Replace the driver-managed skb+copy Rx model with page_pool
zero-copy in preparation for XDP support.
Key changes:
- Allocate order-0 pages via page_pool instead of alloc_pages + dma_map
- Build skbs directly from pages with napi_build_skb (zero-copy)
- Add rtl8169_rx_refill() to replenish descriptors after processing
- Track dirty_rx boundary for efficient refill scheduling
- Cap max_mtu to R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN
(order-0 pages can't support arbitrary jumbo frames)
Tested on RTL8168h with iperf3 (~470 Mbps, 0 retransmits) and
1000 pings (0 drops).
Signed-off-by: atharva-potdar <atharvapotdar07@gmail.com>
---
drivers/net/ethernet/realtek/r8169_main.c | 128 ++++++++++++++--------
1 file changed, 85 insertions(+), 43 deletions(-)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index ec4fc21fa..9d8d678ac 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -31,6 +31,7 @@
#include <linux/unaligned.h>
#include <net/ip6_checksum.h>
#include <net/netdev_queues.h>
+#include <net/page_pool/helpers.h>
#include <net/phy/realtek_phy.h>
#include "r8169.h"
@@ -70,7 +71,9 @@
#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
#define R8169_REGS_SIZE 256
-#define R8169_RX_BUF_SIZE (SZ_16K - 1)
+#define R8169_RX_HEADROOM ALIGN(XDP_PACKET_HEADROOM, 8)
+#define R8169_RX_BUF_SIZE (PAGE_SIZE - R8169_RX_HEADROOM - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define NUM_TX_DESC 256 /* Number of Tx descriptor registers */
#define NUM_RX_DESC 256 /* Number of Rx descriptor registers */
#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
@@ -737,6 +740,7 @@ struct rtl8169_private {
enum mac_version mac_version;
enum rtl_dash_type dash_type;
u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
+ u32 dirty_rx; /* Index of first Rx descriptor needing a new buffer */
u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
u32 dirty_tx;
struct TxDesc *TxDescArray; /* 256-aligned Tx descriptor ring */
@@ -745,6 +749,8 @@ struct rtl8169_private {
dma_addr_t RxPhyAddr;
struct page *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */
struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
+ struct page_pool *page_pool;
+ u32 rx_buf_sz;
u16 cp_cmd;
u16 tx_lpi_timer;
u32 irq_mask;
@@ -4148,37 +4154,27 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-static void rtl8169_mark_to_asic(struct RxDesc *desc)
+static void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
{
u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
desc->opts2 = 0;
/* Force memory writes to complete before releasing descriptor */
dma_wmb();
- WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
+ WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | rx_buf_sz));
}
static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
struct RxDesc *desc)
{
- struct device *d = tp_to_dev(tp);
- int node = dev_to_node(d);
- dma_addr_t mapping;
struct page *data;
- data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
+ data = page_pool_dev_alloc_pages(tp->page_pool);
if (!data)
return NULL;
- mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(d, mapping))) {
- netdev_err(tp->dev, "Failed to map RX DMA!\n");
- __free_pages(data, get_order(R8169_RX_BUF_SIZE));
- return NULL;
- }
-
- desc->addr = cpu_to_le64(mapping);
- rtl8169_mark_to_asic(desc);
+ desc->addr = cpu_to_le64(page_pool_get_dma_addr(data) + R8169_RX_HEADROOM);
+ rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
return data;
}
@@ -4187,15 +4183,17 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
{
int i;
- for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
- dma_unmap_page(tp_to_dev(tp),
- le64_to_cpu(tp->RxDescArray[i].addr),
- R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
- __free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
+ for (i = 0; i < NUM_RX_DESC; i++) {
+ if (!tp->Rx_databuff[i])
+ continue;
+ page_pool_put_full_page(tp->page_pool, tp->Rx_databuff[i], true);
tp->Rx_databuff[i] = NULL;
tp->RxDescArray[i].addr = 0;
tp->RxDescArray[i].opts1 = 0;
}
+
+ page_pool_destroy(tp->page_pool);
+ tp->page_pool = NULL;
}
static int rtl8169_rx_fill(struct rtl8169_private *tp)
@@ -4221,11 +4219,28 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
static int rtl8169_init_ring(struct rtl8169_private *tp)
{
+ struct page_pool_params pp_params = { 0 };
+
rtl8169_init_ring_indexes(tp);
+ tp->dirty_rx = 0;
+ tp->rx_buf_sz = R8169_RX_BUF_SIZE;
memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
+ pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+ pp_params.order = 0;
+ pp_params.pool_size = NUM_RX_DESC;
+ pp_params.nid = dev_to_node(tp_to_dev(tp));
+ pp_params.dev = tp_to_dev(tp);
+ pp_params.dma_dir = DMA_FROM_DEVICE;
+ pp_params.offset = R8169_RX_HEADROOM;
+ pp_params.max_len = tp->rx_buf_sz;
+
+ tp->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(tp->page_pool))
+ return PTR_ERR(tp->page_pool);
+
return rtl8169_rx_fill(tp);
}
@@ -4312,7 +4327,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
rtl8169_cleanup(tp);
for (i = 0; i < NUM_RX_DESC; i++)
- rtl8169_mark_to_asic(tp->RxDescArray + i);
+ rtl8169_mark_to_asic(tp->RxDescArray + i, tp->rx_buf_sz);
napi_enable(&tp->napi);
rtl_hw_start(tp);
@@ -4776,9 +4791,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
for (count = 0; count < budget; count++, tp->cur_rx++) {
unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
struct RxDesc *desc = tp->RxDescArray + entry;
+ struct page *page;
struct sk_buff *skb;
- const void *rx_buf;
- dma_addr_t addr;
u32 status;
status = le32_to_cpu(READ_ONCE(desc->opts1));
@@ -4791,6 +4805,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
*/
dma_rmb();
+ page = tp->Rx_databuff[entry];
+ tp->Rx_databuff[entry] = NULL;
+
if (unlikely(status & RxRES)) {
if (net_ratelimit())
netdev_warn(dev, "Rx ERROR. status = %08x\n",
@@ -4802,9 +4819,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
dev->stats.rx_crc_errors++;
if (!(dev->features & NETIF_F_RXALL))
- goto release_descriptor;
+ goto recycle;
else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
- goto release_descriptor;
+ goto recycle;
}
pkt_size = status & GENMASK(13, 0);
@@ -4817,24 +4834,23 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
if (unlikely(rtl8169_fragmented_frame(status))) {
dev->stats.rx_dropped++;
dev->stats.rx_length_errors++;
- goto release_descriptor;
+ goto recycle;
}
- skb = napi_alloc_skb(&tp->napi, pkt_size);
+ dma_sync_single_for_cpu(d,
+ page_pool_get_dma_addr(page) +
+ R8169_RX_HEADROOM,
+ pkt_size, DMA_FROM_DEVICE);
+
+ skb = napi_build_skb(page_address(page), PAGE_SIZE);
if (unlikely(!skb)) {
dev->stats.rx_dropped++;
- goto release_descriptor;
+ goto recycle;
}
- addr = le64_to_cpu(desc->addr);
- rx_buf = page_address(tp->Rx_databuff[entry]);
-
- dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
- prefetch(rx_buf);
- skb_copy_to_linear_data(skb, rx_buf, pkt_size);
- skb->tail += pkt_size;
- skb->len = pkt_size;
- dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
+ skb_reserve(skb, R8169_RX_HEADROOM);
+ skb_put(skb, pkt_size);
+ skb_mark_for_recycle(skb);
rtl8169_rx_csum(skb, status);
skb->protocol = eth_type_trans(skb, dev);
@@ -4847,13 +4863,34 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
napi_gro_receive(&tp->napi, skb);
dev_sw_netstats_rx_add(dev, pkt_size);
-release_descriptor:
- rtl8169_mark_to_asic(desc);
+
+ continue;
+
+recycle:
+ page_pool_put_full_page(tp->page_pool, page, true);
}
return count;
}
+static void rtl8169_rx_refill(struct rtl8169_private *tp)
+{
+ u32 dirty_rx = tp->dirty_rx;
+
+ while (dirty_rx != tp->cur_rx) {
+ u32 entry = dirty_rx % NUM_RX_DESC;
+
+ if (!tp->Rx_databuff[entry]) {
+ tp->Rx_databuff[entry] = rtl8169_alloc_rx_data(tp,
+ tp->RxDescArray + entry);
+ if (!tp->Rx_databuff[entry])
+ break;
+ }
+ dirty_rx++;
+ }
+ tp->dirty_rx = dirty_rx;
+}
+
static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
{
struct rtl8169_private *tp = dev_instance;
@@ -4921,6 +4958,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
rtl_tx(dev, tp, budget);
work_done = rtl_rx(dev, tp, budget);
+ rtl8169_rx_refill(tp);
if (work_done < budget && napi_complete_done(napi, work_done))
rtl_irq_enable(tp);
@@ -5775,8 +5813,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}
jumbo_max = rtl_jumbo_max(tp);
- if (jumbo_max)
- dev->max_mtu = jumbo_max;
+ if (jumbo_max) {
+ unsigned int page_pool_mtu;
+
+ page_pool_mtu = R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
+ dev->max_mtu = min_t(int, jumbo_max, page_pool_mtu);
+ }
rtl_set_irq_mask(tp);
@@ -5808,7 +5850,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (jumbo_max)
netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
- jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
+ dev->max_mtu, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
"ok" : "ko");
if (tp->dash_type != RTL_DASH_NONE) {
--
2.54.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] r8169: migrate Rx path to page_pool
2026-06-14 5:41 [PATCH net-next] r8169: migrate Rx path to page_pool atharva-potdar
@ 2026-06-14 20:26 ` Heiner Kallweit
2026-06-14 22:09 ` Francois Romieu
1 sibling, 0 replies; 6+ messages in thread
From: Heiner Kallweit @ 2026-06-14 20:26 UTC (permalink / raw)
To: atharva-potdar, nic_swsd, andrew+netdev, davem, edumazet, kuba,
pabeni
Cc: netdev
On 14.06.2026 07:41, atharva-potdar wrote:
> Replace the driver-managed skb+copy Rx model with page_pool
> zero-copy in preparation for XDP support.
>
> Key changes:
> - Allocate order-0 pages via page_pool instead of alloc_pages + dma_map
> - Build skbs directly from pages with napi_build_skb (zero-copy)
> - Add rtl8169_rx_refill() to replenish descriptors after processing
> - Track dirty_rx boundary for efficient refill scheduling
> - Cap max_mtu to R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN
> (order-0 pages can't support arbitrary jumbo frames)
>
If I read this correctly, max_mtu may be lower with this patch.
This may cause a regression for existing users.
> Tested on RTL8168h with iperf3 (~470 Mbps, 0 retransmits) and
> 1000 pings (0 drops).
>
Assuming your link speed is 1Gbps, 470Mbps is quite low.
Did you test also on non-x86 architectures? We had DMA-related regressions
in the past which showed up on certain non-x86 architectures only.
> Signed-off-by: atharva-potdar <atharvapotdar07@gmail.com>
> ---
> drivers/net/ethernet/realtek/r8169_main.c | 128 ++++++++++++++--------
> 1 file changed, 85 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index ec4fc21fa..9d8d678ac 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -31,6 +31,7 @@
> #include <linux/unaligned.h>
> #include <net/ip6_checksum.h>
> #include <net/netdev_queues.h>
> +#include <net/page_pool/helpers.h>
> #include <net/phy/realtek_phy.h>
>
> #include "r8169.h"
> @@ -70,7 +71,9 @@
> #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
>
> #define R8169_REGS_SIZE 256
> -#define R8169_RX_BUF_SIZE (SZ_16K - 1)
> +#define R8169_RX_HEADROOM ALIGN(XDP_PACKET_HEADROOM, 8)
> +#define R8169_RX_BUF_SIZE (PAGE_SIZE - R8169_RX_HEADROOM - \
> + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> #define NUM_TX_DESC 256 /* Number of Tx descriptor registers */
> #define NUM_RX_DESC 256 /* Number of Rx descriptor registers */
> #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
> @@ -737,6 +740,7 @@ struct rtl8169_private {
> enum mac_version mac_version;
> enum rtl_dash_type dash_type;
> u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
> + u32 dirty_rx; /* Index of first Rx descriptor needing a new buffer */
> u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
> u32 dirty_tx;
> struct TxDesc *TxDescArray; /* 256-aligned Tx descriptor ring */
> @@ -745,6 +749,8 @@ struct rtl8169_private {
> dma_addr_t RxPhyAddr;
> struct page *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */
> struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
> + struct page_pool *page_pool;
> + u32 rx_buf_sz;
> u16 cp_cmd;
> u16 tx_lpi_timer;
> u32 irq_mask;
> @@ -4148,37 +4154,27 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
> return 0;
> }
>
> -static void rtl8169_mark_to_asic(struct RxDesc *desc)
> +static void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
> {
> u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
>
> desc->opts2 = 0;
> /* Force memory writes to complete before releasing descriptor */
> dma_wmb();
> - WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
> + WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | rx_buf_sz));
> }
>
> static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
> struct RxDesc *desc)
> {
> - struct device *d = tp_to_dev(tp);
> - int node = dev_to_node(d);
> - dma_addr_t mapping;
> struct page *data;
>
> - data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
> + data = page_pool_dev_alloc_pages(tp->page_pool);
> if (!data)
> return NULL;
>
> - mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> - if (unlikely(dma_mapping_error(d, mapping))) {
> - netdev_err(tp->dev, "Failed to map RX DMA!\n");
> - __free_pages(data, get_order(R8169_RX_BUF_SIZE));
> - return NULL;
> - }
> -
> - desc->addr = cpu_to_le64(mapping);
> - rtl8169_mark_to_asic(desc);
> + desc->addr = cpu_to_le64(page_pool_get_dma_addr(data) + R8169_RX_HEADROOM);
> + rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
>
> return data;
> }
> @@ -4187,15 +4183,17 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
> {
> int i;
>
> - for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
> - dma_unmap_page(tp_to_dev(tp),
> - le64_to_cpu(tp->RxDescArray[i].addr),
> - R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> - __free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
> + for (i = 0; i < NUM_RX_DESC; i++) {
> + if (!tp->Rx_databuff[i])
> + continue;
> + page_pool_put_full_page(tp->page_pool, tp->Rx_databuff[i], true);
> tp->Rx_databuff[i] = NULL;
> tp->RxDescArray[i].addr = 0;
> tp->RxDescArray[i].opts1 = 0;
> }
> +
> + page_pool_destroy(tp->page_pool);
> + tp->page_pool = NULL;
> }
>
> static int rtl8169_rx_fill(struct rtl8169_private *tp)
> @@ -4221,11 +4219,28 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
>
> static int rtl8169_init_ring(struct rtl8169_private *tp)
> {
> + struct page_pool_params pp_params = { 0 };
> +
> rtl8169_init_ring_indexes(tp);
> + tp->dirty_rx = 0;
> + tp->rx_buf_sz = R8169_RX_BUF_SIZE;
>
> memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
> memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
>
> + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
> + pp_params.order = 0;
> + pp_params.pool_size = NUM_RX_DESC;
> + pp_params.nid = dev_to_node(tp_to_dev(tp));
> + pp_params.dev = tp_to_dev(tp);
> + pp_params.dma_dir = DMA_FROM_DEVICE;
> + pp_params.offset = R8169_RX_HEADROOM;
> + pp_params.max_len = tp->rx_buf_sz;
> +
> + tp->page_pool = page_pool_create(&pp_params);
> + if (IS_ERR(tp->page_pool))
> + return PTR_ERR(tp->page_pool);
> +
> return rtl8169_rx_fill(tp);
> }
>
> @@ -4312,7 +4327,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
> rtl8169_cleanup(tp);
>
> for (i = 0; i < NUM_RX_DESC; i++)
> - rtl8169_mark_to_asic(tp->RxDescArray + i);
> + rtl8169_mark_to_asic(tp->RxDescArray + i, tp->rx_buf_sz);
>
> napi_enable(&tp->napi);
> rtl_hw_start(tp);
> @@ -4776,9 +4791,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> for (count = 0; count < budget; count++, tp->cur_rx++) {
> unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
> struct RxDesc *desc = tp->RxDescArray + entry;
> + struct page *page;
> struct sk_buff *skb;
> - const void *rx_buf;
> - dma_addr_t addr;
> u32 status;
>
> status = le32_to_cpu(READ_ONCE(desc->opts1));
> @@ -4791,6 +4805,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> */
> dma_rmb();
>
> + page = tp->Rx_databuff[entry];
> + tp->Rx_databuff[entry] = NULL;
> +
> if (unlikely(status & RxRES)) {
> if (net_ratelimit())
> netdev_warn(dev, "Rx ERROR. status = %08x\n",
> @@ -4802,9 +4819,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> dev->stats.rx_crc_errors++;
>
> if (!(dev->features & NETIF_F_RXALL))
> - goto release_descriptor;
> + goto recycle;
> else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
> - goto release_descriptor;
> + goto recycle;
> }
>
> pkt_size = status & GENMASK(13, 0);
> @@ -4817,24 +4834,23 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> if (unlikely(rtl8169_fragmented_frame(status))) {
> dev->stats.rx_dropped++;
> dev->stats.rx_length_errors++;
> - goto release_descriptor;
> + goto recycle;
> }
>
> - skb = napi_alloc_skb(&tp->napi, pkt_size);
> + dma_sync_single_for_cpu(d,
> + page_pool_get_dma_addr(page) +
> + R8169_RX_HEADROOM,
> + pkt_size, DMA_FROM_DEVICE);
> +
> + skb = napi_build_skb(page_address(page), PAGE_SIZE);
> if (unlikely(!skb)) {
> dev->stats.rx_dropped++;
> - goto release_descriptor;
> + goto recycle;
> }
>
> - addr = le64_to_cpu(desc->addr);
> - rx_buf = page_address(tp->Rx_databuff[entry]);
> -
> - dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
> - prefetch(rx_buf);
> - skb_copy_to_linear_data(skb, rx_buf, pkt_size);
> - skb->tail += pkt_size;
> - skb->len = pkt_size;
> - dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
> + skb_reserve(skb, R8169_RX_HEADROOM);
> + skb_put(skb, pkt_size);
> + skb_mark_for_recycle(skb);
>
> rtl8169_rx_csum(skb, status);
> skb->protocol = eth_type_trans(skb, dev);
> @@ -4847,13 +4863,34 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> napi_gro_receive(&tp->napi, skb);
>
> dev_sw_netstats_rx_add(dev, pkt_size);
> -release_descriptor:
> - rtl8169_mark_to_asic(desc);
> +
> + continue;
> +
> +recycle:
> + page_pool_put_full_page(tp->page_pool, page, true);
> }
>
> return count;
> }
>
> +static void rtl8169_rx_refill(struct rtl8169_private *tp)
> +{
> + u32 dirty_rx = tp->dirty_rx;
> +
> + while (dirty_rx != tp->cur_rx) {
> + u32 entry = dirty_rx % NUM_RX_DESC;
> +
> + if (!tp->Rx_databuff[entry]) {
> + tp->Rx_databuff[entry] = rtl8169_alloc_rx_data(tp,
> + tp->RxDescArray + entry);
> + if (!tp->Rx_databuff[entry])
> + break;
> + }
> + dirty_rx++;
> + }
> + tp->dirty_rx = dirty_rx;
> +}
> +
> static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
> {
> struct rtl8169_private *tp = dev_instance;
> @@ -4921,6 +4958,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
> rtl_tx(dev, tp, budget);
>
> work_done = rtl_rx(dev, tp, budget);
> + rtl8169_rx_refill(tp);
>
> if (work_done < budget && napi_complete_done(napi, work_done))
> rtl_irq_enable(tp);
> @@ -5775,8 +5813,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> }
>
> jumbo_max = rtl_jumbo_max(tp);
> - if (jumbo_max)
> - dev->max_mtu = jumbo_max;
> + if (jumbo_max) {
> + unsigned int page_pool_mtu;
> +
> + page_pool_mtu = R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
> + dev->max_mtu = min_t(int, jumbo_max, page_pool_mtu);
> + }
>
> rtl_set_irq_mask(tp);
>
> @@ -5808,7 +5850,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>
> if (jumbo_max)
> netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
> - jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
> + dev->max_mtu, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
> "ok" : "ko");
>
> if (tp->dash_type != RTL_DASH_NONE) {
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] r8169: migrate Rx path to page_pool
2026-06-14 5:41 [PATCH net-next] r8169: migrate Rx path to page_pool atharva-potdar
2026-06-14 20:26 ` Heiner Kallweit
@ 2026-06-14 22:09 ` Francois Romieu
2026-06-17 3:28 ` Atharva Potdar
1 sibling, 1 reply; 6+ messages in thread
From: Francois Romieu @ 2026-06-14 22:09 UTC (permalink / raw)
To: atharva-potdar
Cc: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
pabeni, netdev
atharva-potdar <atharvapotdar07@gmail.com> :
> Replace the driver-managed skb+copy Rx model with page_pool
> zero-copy in preparation for XDP support.
>
> Key changes:
> - Allocate order-0 pages via page_pool instead of alloc_pages + dma_map
> - Build skbs directly from pages with napi_build_skb (zero-copy)
> - Add rtl8169_rx_refill() to replenish descriptors after processing
> - Track dirty_rx boundary for efficient refill scheduling
> - Cap max_mtu to R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN
> (order-0 pages can't support arbitrary jumbo frames)
>
> Tested on RTL8168h with iperf3 (~470 Mbps, 0 retransmits) and
> 1000 pings (0 drops).
You may consider fdd7b4c3302c93f6833e338903ea77245eb510b4 and some related
changes around that time.
--
Ueimor
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] r8169: migrate Rx path to page_pool
2026-06-14 22:09 ` Francois Romieu
@ 2026-06-17 3:28 ` Atharva Potdar
2026-06-17 16:50 ` Heiner Kallweit
2026-06-17 16:52 ` Heiner Kallweit
0 siblings, 2 replies; 6+ messages in thread
From: Atharva Potdar @ 2026-06-17 3:28 UTC (permalink / raw)
To: Francois Romieu
Cc: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
pabeni, netdev
Hi Heiner, Francois,
Thank you for reviewing this patch.
Francois:
> You may consider fdd7b4c3302c93f6833e338903ea77245eb510b4 and some related
> changes around that time.
I am sorry but I don't fully understand the context of this commit or
the behaviour it addresses. Could you please help me regarding what I
need to watch out for this change?
Heiner:
> Assuming your link speed is 1Gbps, 470Mbps is quite low.
I apologize, that was my benchmark figure when I passed my NIC via
VFIO to a VM for testing. When I tested it bare metal again with
iperf3, I hit line rates of 941 Mbps.
> If I read this correctly, max_mtu may be lower with this patch.
> This may cause a regression for existing users.
My main intention for restricting to order-0 pages is to prepare the
driver for XDP support in the subsequent patches. I understand this
causes a regression but I am not sure of another way to tackle it. How
do you prefer I handle this to avoid breaking current setups while
still having the driver be ready for XDP?
> Did you test also on non-x86 architectures? We had DMA-related regressions
> in the past which showed up on certain non-x86 architectures only.
Unfortunately, I currently only have access to x86 hardware. I cannot
test this on a bare-metal ARM machine, only an ARM VM - which may not
show those hardware issues. How is the testing typically handled for
other architectures in a situation like this?
Thanks,
Atharva.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] r8169: migrate Rx path to page_pool
2026-06-17 3:28 ` Atharva Potdar
@ 2026-06-17 16:50 ` Heiner Kallweit
2026-06-17 16:52 ` Heiner Kallweit
1 sibling, 0 replies; 6+ messages in thread
From: Heiner Kallweit @ 2026-06-17 16:50 UTC (permalink / raw)
To: Atharva Potdar, Francois Romieu
Cc: nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni, netdev
On 17.06.2026 05:28, Atharva Potdar wrote:
> Hi Heiner, Francois,
> Thank you for reviewing this patch.
>
> Francois:
>> You may consider fdd7b4c3302c93f6833e338903ea77245eb510b4 and some related
>> changes around that time.
>
> I am sorry but I don't fully understand the context of this commit or
> the behaviour it addresses. Could you please help me regarding what I
> need to watch out for this change?
>
> Heiner:
>> Assuming your link speed is 1Gbps, 470Mbps is quite low.
>
> I apologize, that was my benchmark figure when I passed my NIC via
> VFIO to a VM for testing. When I tested it bare metal again with
> iperf3, I hit line rates of 941 Mbps.
>
OK, I see. 1Gbps isn't really a challenge, the same at 10Gbps with
a RTL8127 may be more telling.
>> If I read this correctly, max_mtu may be lower with this patch.
>> This may cause a regression for existing users.
>
> My main intention for restricting to order-0 pages is to prepare the
> driver for XDP support in the subsequent patches. I understand this
> causes a regression but I am not sure of another way to tackle it. How
> do you prefer I handle this to avoid breaking current setups while
> still having the driver be ready for XDP?
>
Is XDP in general not supported with bigger jumbo packets?
You should find a way to avoid the regression. Intentionally introducing
a regression I don't think is acceptable.
>> Did you test also on non-x86 architectures? We had DMA-related regressions
>> in the past which showed up on certain non-x86 architectures only.
>
> Unfortunately, I currently only have access to x86 hardware. I cannot
> test this on a bare-metal ARM machine, only an ARM VM - which may not
> show those hardware issues. How is the testing typically handled for
> other architectures in a situation like this?
>
> Thanks,
> Atharva.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] r8169: migrate Rx path to page_pool
2026-06-17 3:28 ` Atharva Potdar
2026-06-17 16:50 ` Heiner Kallweit
@ 2026-06-17 16:52 ` Heiner Kallweit
1 sibling, 0 replies; 6+ messages in thread
From: Heiner Kallweit @ 2026-06-17 16:52 UTC (permalink / raw)
To: Atharva Potdar, Francois Romieu
Cc: nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni, netdev
On 17.06.2026 05:28, Atharva Potdar wrote:
> Hi Heiner, Francois,
> Thank you for reviewing this patch.
>
> Francois:
>> You may consider fdd7b4c3302c93f6833e338903ea77245eb510b4 and some related
>> changes around that time.
>
> I am sorry but I don't fully understand the context of this commit or
> the behaviour it addresses. Could you please help me regarding what I
> need to watch out for this change?
>
> Heiner:
>> Assuming your link speed is 1Gbps, 470Mbps is quite low.
>
> I apologize, that was my benchmark figure when I passed my NIC via
> VFIO to a VM for testing. When I tested it bare metal again with
> iperf3, I hit line rates of 941 Mbps.
>
>> If I read this correctly, max_mtu may be lower with this patch.
>> This may cause a regression for existing users.
>
> My main intention for restricting to order-0 pages is to prepare the
> driver for XDP support in the subsequent patches. I understand this
> causes a regression but I am not sure of another way to tackle it. How
> do you prefer I handle this to avoid breaking current setups while
> still having the driver be ready for XDP?
>
>> Did you test also on non-x86 architectures? We had DMA-related regressions
>> in the past which showed up on certain non-x86 architectures only.
>
> Unfortunately, I currently only have access to x86 hardware. I cannot
> test this on a bare-metal ARM machine, only an ARM VM - which may not
> show those hardware issues. How is the testing typically handled for
> other architectures in a situation like this?
>
It's not only about ARM, I'm aware of at least loongarch systems with
such Realtek NICs. If you can't test it, then you should at least
ensure that in theory the DMA-related flags are OK for basically
any architecture.
> Thanks,
> Atharva.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2026-06-17 16:52 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-14 5:41 [PATCH net-next] r8169: migrate Rx path to page_pool atharva-potdar
2026-06-14 20:26 ` Heiner Kallweit
2026-06-14 22:09 ` Francois Romieu
2026-06-17 3:28 ` Atharva Potdar
2026-06-17 16:50 ` Heiner Kallweit
2026-06-17 16:52 ` Heiner Kallweit
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox