Netdev List
 help / color / mirror / Atom feed
From: atharva-potdar <atharvapotdar07@gmail.com>
To: hkallweit1@gmail.com, nic_swsd@realtek.com,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com
Cc: netdev@vger.kernel.org, atharva-potdar <atharvapotdar07@gmail.com>
Subject: [PATCH net-next] r8169: migrate Rx path to page_pool
Date: Sun, 14 Jun 2026 11:11:37 +0530	[thread overview]
Message-ID: <20260614054137.32181-1-atharvapotdar07@gmail.com> (raw)

Replace the driver-managed skb+copy Rx model with page_pool
zero-copy in preparation for XDP support.

Key changes:
- Allocate order-0 pages via page_pool instead of alloc_pages + dma_map
- Build skbs directly from pages with napi_build_skb (zero-copy)
- Add rtl8169_rx_refill() to replenish descriptors after processing
- Track dirty_rx boundary for efficient refill scheduling
- Cap max_mtu to R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN
  (order-0 pages can't support arbitrary jumbo frames)

Tested on RTL8168h with iperf3 (~470 Mbps, 0 retransmits) and
1000 pings (0 drops).

Signed-off-by: atharva-potdar <atharvapotdar07@gmail.com>
---
 drivers/net/ethernet/realtek/r8169_main.c | 128 ++++++++++++++--------
 1 file changed, 85 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index ec4fc21fa..9d8d678ac 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -31,6 +31,7 @@
 #include <linux/unaligned.h>
 #include <net/ip6_checksum.h>
 #include <net/netdev_queues.h>
+#include <net/page_pool/helpers.h>
 #include <net/phy/realtek_phy.h>
 
 #include "r8169.h"
@@ -70,7 +71,9 @@
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
 
 #define R8169_REGS_SIZE		256
-#define R8169_RX_BUF_SIZE	(SZ_16K - 1)
+#define R8169_RX_HEADROOM	ALIGN(XDP_PACKET_HEADROOM, 8)
+#define R8169_RX_BUF_SIZE	(PAGE_SIZE - R8169_RX_HEADROOM - \
+				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
 #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
@@ -737,6 +740,7 @@ struct rtl8169_private {
 	enum mac_version mac_version;
 	enum rtl_dash_type dash_type;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
+	u32 dirty_rx; /* Index of first Rx descriptor needing a new buffer */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
 	u32 dirty_tx;
 	struct TxDesc *TxDescArray;	/* 256-aligned Tx descriptor ring */
@@ -745,6 +749,8 @@ struct rtl8169_private {
 	dma_addr_t RxPhyAddr;
 	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
+	struct page_pool *page_pool;
+	u32 rx_buf_sz;
 	u16 cp_cmd;
 	u16 tx_lpi_timer;
 	u32 irq_mask;
@@ -4148,37 +4154,27 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static void rtl8169_mark_to_asic(struct RxDesc *desc)
+static void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
 
 	desc->opts2 = 0;
 	/* Force memory writes to complete before releasing descriptor */
 	dma_wmb();
-	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
+	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | rx_buf_sz));
 }
 
 static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 					  struct RxDesc *desc)
 {
-	struct device *d = tp_to_dev(tp);
-	int node = dev_to_node(d);
-	dma_addr_t mapping;
 	struct page *data;
 
-	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
+	data = page_pool_dev_alloc_pages(tp->page_pool);
 	if (!data)
 		return NULL;
 
-	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(d, mapping))) {
-		netdev_err(tp->dev, "Failed to map RX DMA!\n");
-		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
-		return NULL;
-	}
-
-	desc->addr = cpu_to_le64(mapping);
-	rtl8169_mark_to_asic(desc);
+	desc->addr = cpu_to_le64(page_pool_get_dma_addr(data) + R8169_RX_HEADROOM);
+	rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
 
 	return data;
 }
@@ -4187,15 +4183,17 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
 {
 	int i;
 
-	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
-		dma_unmap_page(tp_to_dev(tp),
-			       le64_to_cpu(tp->RxDescArray[i].addr),
-			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
-		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
+	for (i = 0; i < NUM_RX_DESC; i++) {
+		if (!tp->Rx_databuff[i])
+			continue;
+		page_pool_put_full_page(tp->page_pool, tp->Rx_databuff[i], true);
 		tp->Rx_databuff[i] = NULL;
 		tp->RxDescArray[i].addr = 0;
 		tp->RxDescArray[i].opts1 = 0;
 	}
+
+	page_pool_destroy(tp->page_pool);
+	tp->page_pool = NULL;
 }
 
 static int rtl8169_rx_fill(struct rtl8169_private *tp)
@@ -4221,11 +4219,28 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
 
 static int rtl8169_init_ring(struct rtl8169_private *tp)
 {
+	struct page_pool_params pp_params = { 0 };
+
 	rtl8169_init_ring_indexes(tp);
+	tp->dirty_rx = 0;
+	tp->rx_buf_sz = R8169_RX_BUF_SIZE;
 
 	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
 	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
 
+	pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+	pp_params.order = 0;
+	pp_params.pool_size = NUM_RX_DESC;
+	pp_params.nid = dev_to_node(tp_to_dev(tp));
+	pp_params.dev = tp_to_dev(tp);
+	pp_params.dma_dir = DMA_FROM_DEVICE;
+	pp_params.offset = R8169_RX_HEADROOM;
+	pp_params.max_len = tp->rx_buf_sz;
+
+	tp->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(tp->page_pool))
+		return PTR_ERR(tp->page_pool);
+
 	return rtl8169_rx_fill(tp);
 }
 
@@ -4312,7 +4327,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	rtl8169_cleanup(tp);
 
 	for (i = 0; i < NUM_RX_DESC; i++)
-		rtl8169_mark_to_asic(tp->RxDescArray + i);
+		rtl8169_mark_to_asic(tp->RxDescArray + i, tp->rx_buf_sz);
 
 	napi_enable(&tp->napi);
 	rtl_hw_start(tp);
@@ -4776,9 +4791,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 	for (count = 0; count < budget; count++, tp->cur_rx++) {
 		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
 		struct RxDesc *desc = tp->RxDescArray + entry;
+		struct page *page;
 		struct sk_buff *skb;
-		const void *rx_buf;
-		dma_addr_t addr;
 		u32 status;
 
 		status = le32_to_cpu(READ_ONCE(desc->opts1));
@@ -4791,6 +4805,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		 */
 		dma_rmb();
 
+		page = tp->Rx_databuff[entry];
+		tp->Rx_databuff[entry] = NULL;
+
 		if (unlikely(status & RxRES)) {
 			if (net_ratelimit())
 				netdev_warn(dev, "Rx ERROR. status = %08x\n",
@@ -4802,9 +4819,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 				dev->stats.rx_crc_errors++;
 
 			if (!(dev->features & NETIF_F_RXALL))
-				goto release_descriptor;
+				goto recycle;
 			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
-				goto release_descriptor;
+				goto recycle;
 		}
 
 		pkt_size = status & GENMASK(13, 0);
@@ -4817,24 +4834,23 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		if (unlikely(rtl8169_fragmented_frame(status))) {
 			dev->stats.rx_dropped++;
 			dev->stats.rx_length_errors++;
-			goto release_descriptor;
+			goto recycle;
 		}
 
-		skb = napi_alloc_skb(&tp->napi, pkt_size);
+		dma_sync_single_for_cpu(d,
+					page_pool_get_dma_addr(page) +
+					R8169_RX_HEADROOM,
+					pkt_size, DMA_FROM_DEVICE);
+
+		skb = napi_build_skb(page_address(page), PAGE_SIZE);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
-			goto release_descriptor;
+			goto recycle;
 		}
 
-		addr = le64_to_cpu(desc->addr);
-		rx_buf = page_address(tp->Rx_databuff[entry]);
-
-		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
-		prefetch(rx_buf);
-		skb_copy_to_linear_data(skb, rx_buf, pkt_size);
-		skb->tail += pkt_size;
-		skb->len = pkt_size;
-		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
+		skb_reserve(skb, R8169_RX_HEADROOM);
+		skb_put(skb, pkt_size);
+		skb_mark_for_recycle(skb);
 
 		rtl8169_rx_csum(skb, status);
 		skb->protocol = eth_type_trans(skb, dev);
@@ -4847,13 +4863,34 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		napi_gro_receive(&tp->napi, skb);
 
 		dev_sw_netstats_rx_add(dev, pkt_size);
-release_descriptor:
-		rtl8169_mark_to_asic(desc);
+
+		continue;
+
+recycle:
+		page_pool_put_full_page(tp->page_pool, page, true);
 	}
 
 	return count;
 }
 
+static void rtl8169_rx_refill(struct rtl8169_private *tp)
+{
+	u32 dirty_rx = tp->dirty_rx;
+
+	while (dirty_rx != tp->cur_rx) {
+		u32 entry = dirty_rx % NUM_RX_DESC;
+
+		if (!tp->Rx_databuff[entry]) {
+			tp->Rx_databuff[entry] = rtl8169_alloc_rx_data(tp,
+								       tp->RxDescArray + entry);
+			if (!tp->Rx_databuff[entry])
+				break;
+		}
+		dirty_rx++;
+	}
+	tp->dirty_rx = dirty_rx;
+}
+
 static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 {
 	struct rtl8169_private *tp = dev_instance;
@@ -4921,6 +4958,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	rtl_tx(dev, tp, budget);
 
 	work_done = rtl_rx(dev, tp, budget);
+	rtl8169_rx_refill(tp);
 
 	if (work_done < budget && napi_complete_done(napi, work_done))
 		rtl_irq_enable(tp);
@@ -5775,8 +5813,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	jumbo_max = rtl_jumbo_max(tp);
-	if (jumbo_max)
-		dev->max_mtu = jumbo_max;
+	if (jumbo_max) {
+		unsigned int page_pool_mtu;
+
+		page_pool_mtu = R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
+		dev->max_mtu = min_t(int, jumbo_max, page_pool_mtu);
+	}
 
 	rtl_set_irq_mask(tp);
 
@@ -5808,7 +5850,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	if (jumbo_max)
 		netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
-			    jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
+			    dev->max_mtu, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
 			    "ok" : "ko");
 
 	if (tp->dash_type != RTL_DASH_NONE) {
-- 
2.54.0


                 reply	other threads:[~2026-06-14  5:41 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260614054137.32181-1-atharvapotdar07@gmail.com \
    --to=atharvapotdar07@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hkallweit1@gmail.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=nic_swsd@realtek.com \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox