Netdev List
 help / color / mirror / Atom feed
From: Heiner Kallweit <hkallweit1@gmail.com>
To: atharva-potdar <atharvapotdar07@gmail.com>,
	nic_swsd@realtek.com, andrew+netdev@lunn.ch, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com
Cc: netdev@vger.kernel.org
Subject: Re: [PATCH net-next] r8169: migrate Rx path to page_pool
Date: Sun, 14 Jun 2026 22:26:57 +0200	[thread overview]
Message-ID: <04959cda-8631-4346-bbbf-edc444ce242f@gmail.com> (raw)
In-Reply-To: <20260614054137.32181-1-atharvapotdar07@gmail.com>

On 14.06.2026 07:41, atharva-potdar wrote:
> Replace the driver-managed skb+copy Rx model with page_pool
> zero-copy in preparation for XDP support.
> 
> Key changes:
> - Allocate order-0 pages via page_pool instead of alloc_pages + dma_map
> - Build skbs directly from pages with napi_build_skb (zero-copy)
> - Add rtl8169_rx_refill() to replenish descriptors after processing
> - Track dirty_rx boundary for efficient refill scheduling
> - Cap max_mtu to R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN
>   (order-0 pages can't support arbitrary jumbo frames)
> 
If I read this correctly, max_mtu may be lower with this patch.
This may cause a regression for existing users.

> Tested on RTL8168h with iperf3 (~470 Mbps, 0 retransmits) and
> 1000 pings (0 drops).
> 
Assuming your link speed is 1Gbps, 470Mbps is quite low.

Did you test also on non-x86 architectures? We had DMA-related regressions
in the past which showed up on certain non-x86 architectures only.

> Signed-off-by: atharva-potdar <atharvapotdar07@gmail.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 128 ++++++++++++++--------
>  1 file changed, 85 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index ec4fc21fa..9d8d678ac 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -31,6 +31,7 @@
>  #include <linux/unaligned.h>
>  #include <net/ip6_checksum.h>
>  #include <net/netdev_queues.h>
> +#include <net/page_pool/helpers.h>
>  #include <net/phy/realtek_phy.h>
>  
>  #include "r8169.h"
> @@ -70,7 +71,9 @@
>  #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
>  
>  #define R8169_REGS_SIZE		256
> -#define R8169_RX_BUF_SIZE	(SZ_16K - 1)
> +#define R8169_RX_HEADROOM	ALIGN(XDP_PACKET_HEADROOM, 8)
> +#define R8169_RX_BUF_SIZE	(PAGE_SIZE - R8169_RX_HEADROOM - \
> +				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
>  #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
>  #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
>  #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
> @@ -737,6 +740,7 @@ struct rtl8169_private {
>  	enum mac_version mac_version;
>  	enum rtl_dash_type dash_type;
>  	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
> +	u32 dirty_rx; /* Index of first Rx descriptor needing a new buffer */
>  	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
>  	u32 dirty_tx;
>  	struct TxDesc *TxDescArray;	/* 256-aligned Tx descriptor ring */
> @@ -745,6 +749,8 @@ struct rtl8169_private {
>  	dma_addr_t RxPhyAddr;
>  	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
> +	struct page_pool *page_pool;
> +	u32 rx_buf_sz;
>  	u16 cp_cmd;
>  	u16 tx_lpi_timer;
>  	u32 irq_mask;
> @@ -4148,37 +4154,27 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
>  	return 0;
>  }
>  
> -static void rtl8169_mark_to_asic(struct RxDesc *desc)
> +static void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
>  {
>  	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
>  
>  	desc->opts2 = 0;
>  	/* Force memory writes to complete before releasing descriptor */
>  	dma_wmb();
> -	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
> +	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | rx_buf_sz));
>  }
>  
>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  					  struct RxDesc *desc)
>  {
> -	struct device *d = tp_to_dev(tp);
> -	int node = dev_to_node(d);
> -	dma_addr_t mapping;
>  	struct page *data;
>  
> -	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
> +	data = page_pool_dev_alloc_pages(tp->page_pool);
>  	if (!data)
>  		return NULL;
>  
> -	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> -	if (unlikely(dma_mapping_error(d, mapping))) {
> -		netdev_err(tp->dev, "Failed to map RX DMA!\n");
> -		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
> -		return NULL;
> -	}
> -
> -	desc->addr = cpu_to_le64(mapping);
> -	rtl8169_mark_to_asic(desc);
> +	desc->addr = cpu_to_le64(page_pool_get_dma_addr(data) + R8169_RX_HEADROOM);
> +	rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
>  
>  	return data;
>  }
> @@ -4187,15 +4183,17 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
>  {
>  	int i;
>  
> -	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
> -		dma_unmap_page(tp_to_dev(tp),
> -			       le64_to_cpu(tp->RxDescArray[i].addr),
> -			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> -		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
> +	for (i = 0; i < NUM_RX_DESC; i++) {
> +		if (!tp->Rx_databuff[i])
> +			continue;
> +		page_pool_put_full_page(tp->page_pool, tp->Rx_databuff[i], true);
>  		tp->Rx_databuff[i] = NULL;
>  		tp->RxDescArray[i].addr = 0;
>  		tp->RxDescArray[i].opts1 = 0;
>  	}
> +
> +	page_pool_destroy(tp->page_pool);
> +	tp->page_pool = NULL;
>  }
>  
>  static int rtl8169_rx_fill(struct rtl8169_private *tp)
> @@ -4221,11 +4219,28 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
>  
>  static int rtl8169_init_ring(struct rtl8169_private *tp)
>  {
> +	struct page_pool_params pp_params = { 0 };
> +
>  	rtl8169_init_ring_indexes(tp);
> +	tp->dirty_rx = 0;
> +	tp->rx_buf_sz = R8169_RX_BUF_SIZE;
>  
>  	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
>  	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
>  
> +	pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
> +	pp_params.order = 0;
> +	pp_params.pool_size = NUM_RX_DESC;
> +	pp_params.nid = dev_to_node(tp_to_dev(tp));
> +	pp_params.dev = tp_to_dev(tp);
> +	pp_params.dma_dir = DMA_FROM_DEVICE;
> +	pp_params.offset = R8169_RX_HEADROOM;
> +	pp_params.max_len = tp->rx_buf_sz;
> +
> +	tp->page_pool = page_pool_create(&pp_params);
> +	if (IS_ERR(tp->page_pool))
> +		return PTR_ERR(tp->page_pool);
> +
>  	return rtl8169_rx_fill(tp);
>  }
>  
> @@ -4312,7 +4327,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
>  	rtl8169_cleanup(tp);
>  
>  	for (i = 0; i < NUM_RX_DESC; i++)
> -		rtl8169_mark_to_asic(tp->RxDescArray + i);
> +		rtl8169_mark_to_asic(tp->RxDescArray + i, tp->rx_buf_sz);
>  
>  	napi_enable(&tp->napi);
>  	rtl_hw_start(tp);
> @@ -4776,9 +4791,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  	for (count = 0; count < budget; count++, tp->cur_rx++) {
>  		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
>  		struct RxDesc *desc = tp->RxDescArray + entry;
> +		struct page *page;
>  		struct sk_buff *skb;
> -		const void *rx_buf;
> -		dma_addr_t addr;
>  		u32 status;
>  
>  		status = le32_to_cpu(READ_ONCE(desc->opts1));
> @@ -4791,6 +4805,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		 */
>  		dma_rmb();
>  
> +		page = tp->Rx_databuff[entry];
> +		tp->Rx_databuff[entry] = NULL;
> +
>  		if (unlikely(status & RxRES)) {
>  			if (net_ratelimit())
>  				netdev_warn(dev, "Rx ERROR. status = %08x\n",
> @@ -4802,9 +4819,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  				dev->stats.rx_crc_errors++;
>  
>  			if (!(dev->features & NETIF_F_RXALL))
> -				goto release_descriptor;
> +				goto recycle;
>  			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
> -				goto release_descriptor;
> +				goto recycle;
>  		}
>  
>  		pkt_size = status & GENMASK(13, 0);
> @@ -4817,24 +4834,23 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (unlikely(rtl8169_fragmented_frame(status))) {
>  			dev->stats.rx_dropped++;
>  			dev->stats.rx_length_errors++;
> -			goto release_descriptor;
> +			goto recycle;
>  		}
>  
> -		skb = napi_alloc_skb(&tp->napi, pkt_size);
> +		dma_sync_single_for_cpu(d,
> +					page_pool_get_dma_addr(page) +
> +					R8169_RX_HEADROOM,
> +					pkt_size, DMA_FROM_DEVICE);
> +
> +		skb = napi_build_skb(page_address(page), PAGE_SIZE);
>  		if (unlikely(!skb)) {
>  			dev->stats.rx_dropped++;
> -			goto release_descriptor;
> +			goto recycle;
>  		}
>  
> -		addr = le64_to_cpu(desc->addr);
> -		rx_buf = page_address(tp->Rx_databuff[entry]);
> -
> -		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
> -		prefetch(rx_buf);
> -		skb_copy_to_linear_data(skb, rx_buf, pkt_size);
> -		skb->tail += pkt_size;
> -		skb->len = pkt_size;
> -		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
> +		skb_reserve(skb, R8169_RX_HEADROOM);
> +		skb_put(skb, pkt_size);
> +		skb_mark_for_recycle(skb);
>  
>  		rtl8169_rx_csum(skb, status);
>  		skb->protocol = eth_type_trans(skb, dev);
> @@ -4847,13 +4863,34 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		napi_gro_receive(&tp->napi, skb);
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
> -release_descriptor:
> -		rtl8169_mark_to_asic(desc);
> +
> +		continue;
> +
> +recycle:
> +		page_pool_put_full_page(tp->page_pool, page, true);
>  	}
>  
>  	return count;
>  }
>  
> +static void rtl8169_rx_refill(struct rtl8169_private *tp)
> +{
> +	u32 dirty_rx = tp->dirty_rx;
> +
> +	while (dirty_rx != tp->cur_rx) {
> +		u32 entry = dirty_rx % NUM_RX_DESC;
> +
> +		if (!tp->Rx_databuff[entry]) {
> +			tp->Rx_databuff[entry] = rtl8169_alloc_rx_data(tp,
> +								       tp->RxDescArray + entry);
> +			if (!tp->Rx_databuff[entry])
> +				break;
> +		}
> +		dirty_rx++;
> +	}
> +	tp->dirty_rx = dirty_rx;
> +}
> +
>  static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
>  	struct rtl8169_private *tp = dev_instance;
> @@ -4921,6 +4958,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>  	rtl_tx(dev, tp, budget);
>  
>  	work_done = rtl_rx(dev, tp, budget);
> +	rtl8169_rx_refill(tp);
>  
>  	if (work_done < budget && napi_complete_done(napi, work_done))
>  		rtl_irq_enable(tp);
> @@ -5775,8 +5813,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	}
>  
>  	jumbo_max = rtl_jumbo_max(tp);
> -	if (jumbo_max)
> -		dev->max_mtu = jumbo_max;
> +	if (jumbo_max) {
> +		unsigned int page_pool_mtu;
> +
> +		page_pool_mtu = R8169_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
> +		dev->max_mtu = min_t(int, jumbo_max, page_pool_mtu);
> +	}
>  
>  	rtl_set_irq_mask(tp);
>  
> @@ -5808,7 +5850,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	if (jumbo_max)
>  		netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
> -			    jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
> +			    dev->max_mtu, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
>  			    "ok" : "ko");
>  
>  	if (tp->dash_type != RTL_DASH_NONE) {


  reply	other threads:[~2026-06-14 20:27 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-14  5:41 [PATCH net-next] r8169: migrate Rx path to page_pool atharva-potdar
2026-06-14 20:26 ` Heiner Kallweit [this message]
2026-06-14 22:09 ` Francois Romieu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=04959cda-8631-4346-bbbf-edc444ce242f@gmail.com \
    --to=hkallweit1@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=atharvapotdar07@gmail.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=nic_swsd@realtek.com \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox