public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [Patch net-next v1 0/7] r8169: add RSS support for RTL8127
@ 2026-05-06  8:13 javen
  2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
                   ` (7 more replies)
  0 siblings, 8 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch series adds RSS (Receive Side Scaling) support for the r8169
ethernet driver, specifically for RTL8127 (RTL_GIGA_MAC_VER_80).

RSS enables packet distribution across multiple receive queues, which can
significantly improve network throughput on multi-core systems by allowing
parallel processing of incoming packets.

Key features:
- Multi-queue RX support (up to 8 queues)
- MSI-X interrupt with vector mapping
- Dynamic queue configuration via ethtool (-L)
- RSS hash computation for flow classification

Experiments:
Platform: AMD Ryzen Embedded R2514 with Radeon Graphics(4 Cores/8 Threads)
Arch: x86_64
Test command: 
  Server: iperf3 -s
  Client: iperf3 -c 192.168.2.1 -P 20 -t 3600
Monitor: mpstat -P ALL 1

Before this patch (Without RSS):
  Throughput: Unstable, fluctuating between 3.76 Gbits/sec and
  8.2 Gbits/sec.
  CPU Usage: A single CPU core is fully occupied with softirq reaching 
  up to 96%.

After this patch (With RSS enabled):
  Throughput: Stable at 9.42 Gbits/sec.
  CPU Usage: The traffic load is evenly distributed across multiple CPU
  cores. The maximum softirq on a single core dropped to 63%.
  
Other Experiments:
Link: https://lore.kernel.org/netdev/0A5279953D81BB9C+f50c9b49-3e5d-467f-b69a-7e49ed223383@radxa.com/

Javen Xu (7):
  r8169: add support for multi irqs
  r8169: add support for multi rx queues
  r8169: add support for new interrupt mapping
  r8169: enable new interrupt mapping
  r8169: add support and enable rss
  r8169: move struct ethtool_ops
  r8169: add support for ethtool

 drivers/net/ethernet/realtek/r8169_main.c | 1202 ++++++++++++++++++---
 1 file changed, 1080 insertions(+), 122 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Patch net-next v1 1/7] r8169: add support for multi irqs
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
@ 2026-05-06  8:13 ` javen
  2026-05-06 21:28   ` Heiner Kallweit
  2026-05-06 22:53   ` Jakub Kicinski
  2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

RSS uses multi rx queues to receive packets, and each rx queue needs one
irq and napi. So this patch adds support for multi irqs and napi here.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 199 ++++++++++++++++++++--
 1 file changed, 184 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 791277e750ba..ef74ee02c117 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -77,6 +77,7 @@
 #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
 #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
 #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
+#define R8169_MAX_MSIX_VEC	32
 
 #define OCP_STD_PHY_BASE	0xa400
 
@@ -435,6 +436,8 @@ enum rtl8125_registers {
 #define INT_CFG0_CLKREQEN		BIT(3)
 	IntrMask_8125		= 0x38,
 	IntrStatus_8125		= 0x3c,
+	INTR_VEC_MAP_MASK	= 0x800,
+	INTR_VEC_MAP_STATUS	= 0x802,
 	INT_CFG1_8125		= 0x7a,
 	LEDSEL2			= 0x84,
 	LEDSEL1			= 0x86,
@@ -728,6 +731,19 @@ enum rtl_dash_type {
 	RTL_DASH_25_BP,
 };
 
+struct rtl8169_napi {
+	struct napi_struct napi;
+	void *priv;
+	int index;
+};
+
+struct rtl8169_irq {
+	irq_handler_t	handler;
+	unsigned int	vector;
+	u8		requested;
+	char		name[IFNAMSIZ + 10];
+};
+
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
@@ -745,9 +761,19 @@ struct rtl8169_private {
 	dma_addr_t RxPhyAddr;
 	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
+	struct rtl8169_irq irq_tbl[R8169_MAX_MSIX_VEC];
+	struct rtl8169_napi r8169napi[R8169_MAX_MSIX_VEC];
+	u16 isr_reg[R8169_MAX_MSIX_VEC];
+	u16 imr_reg[R8169_MAX_MSIX_VEC];
+	unsigned int num_rx_rings;
 	u16 cp_cmd;
 	u16 tx_lpi_timer;
 	u32 irq_mask;
+	u8 min_irq_nvecs;
+	u8 max_irq_nvecs;
+	u8 hw_supp_isr_ver;
+	u8 hw_curr_isr_ver;
+	u8 irq_nvecs;
 	int irq;
 	struct clk *clk;
 
@@ -763,6 +789,8 @@ struct rtl8169_private {
 	unsigned aspm_manageable:1;
 	unsigned dash_enabled:1;
 	bool sfp_mode:1;
+	bool rss_support:1;
+	bool rss_enable:1;
 	dma_addr_t counters_phys_addr;
 	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
@@ -2680,6 +2708,44 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
 	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
 
+static void rtl_setup_mqs_reg(struct rtl8169_private *tp)
+{
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_52) {
+		tp->isr_reg[0] = IntrStatus;
+		tp->imr_reg[0] = IntrMask;
+	} else {
+		tp->isr_reg[0] = IntrStatus_8125;
+		tp->imr_reg[0] = IntrMask_8125;
+	}
+
+	for (int i = 1; i < tp->max_irq_nvecs; i++)
+		tp->isr_reg[i] = (u16)(INTR_VEC_MAP_STATUS + (i - 1) * 4);
+
+	for (int i = 1; i < tp->max_irq_nvecs; i++)
+		tp->imr_reg[i] = (u16)(INTR_VEC_MAP_MASK + (i - 1) * 4);
+}
+
+static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
+{
+	tp->num_rx_rings = 1;
+
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_80:
+		tp->min_irq_nvecs = 1;
+		tp->max_irq_nvecs = 1;
+		tp->hw_supp_isr_ver = 6;
+		break;
+	default:
+		tp->min_irq_nvecs = 1;
+		tp->max_irq_nvecs = 1;
+		tp->hw_supp_isr_ver = 1;
+		break;
+	}
+	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
+
+	rtl_setup_mqs_reg(tp);
+}
+
 static void rtl_request_firmware(struct rtl8169_private *tp)
 {
 	struct rtl_fw *rtl_fw;
@@ -4266,9 +4332,21 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
 	netdev_reset_queue(tp->dev);
 }
 
+static void rtl8169_napi_disable(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++)
+		napi_disable(&tp->r8169napi[i].napi);
+}
+
+static void rtl8169_napi_enable(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++)
+		napi_enable(&tp->r8169napi[i].napi);
+}
+
 static void rtl8169_cleanup(struct rtl8169_private *tp)
 {
-	napi_disable(&tp->napi);
+	rtl8169_napi_disable(tp);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
 	synchronize_net();
@@ -4313,8 +4391,8 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 
 	for (i = 0; i < NUM_RX_DESC; i++)
 		rtl8169_mark_to_asic(tp->RxDescArray + i);
+	rtl8169_napi_enable(tp);
 
-	napi_enable(&tp->napi);
 	rtl_hw_start(tp);
 }
 
@@ -4820,7 +4898,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 			goto release_descriptor;
 		}
 
-		skb = napi_alloc_skb(&tp->napi, pkt_size);
+		skb = napi_alloc_skb(&tp->r8169napi[0].napi, pkt_size);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
 			goto release_descriptor;
@@ -4844,7 +4922,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		if (skb->pkt_type == PACKET_MULTICAST)
 			dev->stats.multicast++;
 
-		napi_gro_receive(&tp->napi, skb);
+		napi_gro_receive(&tp->r8169napi[0].napi, skb);
 
 		dev_sw_netstats_rx_add(dev, pkt_size);
 release_descriptor:
@@ -4856,7 +4934,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 
 static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 {
-	struct rtl8169_private *tp = dev_instance;
+	struct rtl8169_napi *napi = dev_instance;
+	struct rtl8169_private *tp = napi->priv;
 	u32 status = rtl_get_events(tp);
 
 	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
@@ -4873,13 +4952,53 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		phy_mac_interrupt(tp->phydev);
 
 	rtl_irq_disable(tp);
-	napi_schedule(&tp->napi);
+	napi_schedule(&napi->napi);
 out:
 	rtl_ack_events(tp, status);
 
 	return IRQ_HANDLED;
 }
 
+static void rtl8169_free_irq(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++) {
+		struct rtl8169_irq *irq = &tp->irq_tbl[i];
+		struct rtl8169_napi *napi = &tp->r8169napi[i];
+
+		if (irq->requested) {
+			irq->requested = 0;
+			pci_free_irq(tp->pci_dev, i, napi);
+		}
+	}
+}
+
+static int rtl8169_request_irq(struct rtl8169_private *tp)
+{
+	const int len = sizeof(tp->irq_tbl[0].name);
+	struct net_device *dev = tp->dev;
+	struct rtl8169_napi *napi;
+	struct rtl8169_irq *irq;
+	int rc = 0;
+
+	for (int i = 0; i < tp->irq_nvecs; i++) {
+		irq = &tp->irq_tbl[i];
+
+		napi = &tp->r8169napi[i];
+		snprintf(irq->name, len, "%s-%d", dev->name, i);
+		irq->handler = rtl8169_interrupt;
+		rc = pci_request_irq(tp->pci_dev, i, irq->handler, NULL, napi, irq->name);
+		if (rc)
+			break;
+
+		irq->vector = pci_irq_vector(tp->pci_dev, i);
+		irq->requested = 1;
+	}
+
+	if (rc)
+		rtl8169_free_irq(tp);
+	return rc;
+}
+
 static void rtl_task(struct work_struct *work)
 {
 	struct rtl8169_private *tp =
@@ -4914,9 +5033,10 @@ static void rtl_task(struct work_struct *work)
 
 static int rtl8169_poll(struct napi_struct *napi, int budget)
 {
-	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
+	struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi, napi);
+	struct rtl8169_private *tp = r8169_napi->priv;
 	struct net_device *dev = tp->dev;
-	int work_done;
+	int work_done = 0;
 
 	rtl_tx(dev, tp, budget);
 
@@ -5035,7 +5155,7 @@ static void rtl8169_up(struct rtl8169_private *tp)
 	phy_init_hw(tp->phydev);
 	phy_resume(tp->phydev);
 	rtl8169_init_phy(tp);
-	napi_enable(&tp->napi);
+	rtl8169_napi_enable(tp);
 	enable_work(&tp->wk.work);
 	rtl_reset_work(tp);
 
@@ -5053,7 +5173,7 @@ static int rtl8169_close(struct net_device *dev)
 	rtl8169_down(tp);
 	rtl8169_rx_clear(tp);
 
-	free_irq(tp->irq, tp);
+	rtl8169_free_irq(tp);
 
 	phy_disconnect(tp->phydev);
 
@@ -5108,7 +5228,8 @@ static int rtl_open(struct net_device *dev)
 	rtl_request_firmware(tp);
 
 	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
-	retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
+
+	retval = rtl8169_request_irq(tp);
 	if (retval < 0)
 		goto err_release_fw_2;
 
@@ -5125,7 +5246,7 @@ static int rtl_open(struct net_device *dev)
 	return retval;
 
 err_free_irq:
-	free_irq(tp->irq, tp);
+	rtl8169_free_irq(tp);
 err_release_fw_2:
 	rtl_release_firmware(tp);
 	rtl8169_rx_clear(tp);
@@ -5328,7 +5449,9 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
+	struct pci_dev *pdev = tp->pci_dev;
 	unsigned int flags;
+	int nvecs;
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@@ -5344,7 +5467,18 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 		break;
 	}
 
-	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
+	nvecs = pci_alloc_irq_vectors(pdev, tp->min_irq_nvecs, tp->max_irq_nvecs, flags);
+
+	if (nvecs < 0)
+		nvecs = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+
+	if (nvecs < 0)
+		return nvecs;
+
+	tp->irq = pci_irq_vector(pdev, 0);
+	tp->irq_nvecs = nvecs;
+
+	return 0;
 }
 
 static void rtl_read_mac_address(struct rtl8169_private *tp,
@@ -5539,6 +5673,17 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
 	}
 }
 
+static int rtl8169_set_real_num_queue(struct rtl8169_private *tp)
+{
+	int retval;
+
+	retval = netif_set_real_num_tx_queues(tp->dev, 1);
+	if (retval < 0)
+		return retval;
+
+	return netif_set_real_num_rx_queues(tp->dev, tp->num_rx_rings);
+}
+
 static int rtl_jumbo_max(struct rtl8169_private *tp)
 {
 	/* Non-GBit versions don't support jumbo frames */
@@ -5599,6 +5744,19 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
 	return false;
 }
 
+static void r8169_init_napi(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++) {
+		struct rtl8169_napi *r8169napi = &tp->r8169napi[i];
+		int (*poll)(struct napi_struct *napi, int budget);
+
+		poll = rtl8169_poll;
+		netif_napi_add(tp->dev, &r8169napi->napi, poll);
+		r8169napi->priv = tp;
+		r8169napi->index = i;
+	}
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_chip_info *chip;
@@ -5703,11 +5861,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rtl_hw_reset(tp);
 
+	rtl_software_parameter_initialize(tp);
+
 	rc = rtl_alloc_irq(tp);
 	if (rc < 0)
 		return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
 
-	tp->irq = pci_irq_vector(pdev, 0);
 
 	INIT_WORK(&tp->wk.work, rtl_task);
 	disable_work(&tp->wk.work);
@@ -5716,7 +5875,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 
-	netif_napi_add(dev, &tp->napi, rtl8169_poll);
+	if (!tp->rss_support) {
+		netif_napi_add(dev, &tp->r8169napi[0].napi, rtl8169_poll);
+		tp->r8169napi[0].priv = tp;
+		tp->r8169napi[0].index = 0;
+	} else {
+		r8169_init_napi(tp);
+	}
 
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
@@ -5778,6 +5943,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (jumbo_max)
 		dev->max_mtu = jumbo_max;
 
+	rc = rtl8169_set_real_num_queue(tp);
+	if (rc < 0)
+		return dev_err_probe(&pdev->dev, rc, "set tx/rx num failure\n");
+
 	rtl_set_irq_mask(tp);
 
 	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Patch net-next v1 2/7] r8169: add support for multi rx queues
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
  2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
@ 2026-05-06  8:13 ` javen
  2026-05-06 21:45   ` Heiner Kallweit
  2026-05-06 22:54   ` Jakub Kicinski
  2026-05-06  8:13 ` [Patch net-next v1 3/7] r8169: add support for new interrupt mapping javen
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch adds support for multi rx queues. RSS requires multi rx
queues to receive packets. So we need struct rtl8169_rx_ring for each
queue.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 318 +++++++++++++++++-----
 1 file changed, 251 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index ef74ee02c117..bc75dbb9901d 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -74,10 +74,11 @@
 #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
 #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
-#define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
 #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
 #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
+#define R8169_MAX_RX_QUEUES	8
 #define R8169_MAX_MSIX_VEC	32
+#define R8127_MAX_RX_QUEUES	8
 
 #define OCP_STD_PHY_BASE	0xa400
 
@@ -447,6 +448,7 @@ enum rtl8125_registers {
 	RSS_CTRL_8125		= 0x4500,
 	Q_NUM_CTRL_8125		= 0x4800,
 	EEE_TXIDLE_TIMER_8125	= 0x6048,
+	RDSAR_Q1_LOW		= 0x4000,
 };
 
 #define LEDSEL_MASK_8125	0x23f
@@ -731,6 +733,19 @@ enum rtl_dash_type {
 	RTL_DASH_25_BP,
 };
 
+struct rtl8169_rx_ring {
+	u32 index;					/* Rx queue index */
+	u32 cur_rx;					/* Index of next Rx pkt. */
+	u32 dirty_rx;					/* Index for recycling. */
+	u32 num_rx_desc;				/* num of Rx desc */
+	struct RxDesc *rx_desc_array;			/* array of Rx Desc*/
+	u32 rx_desc_alloc_size;				/* memory size per descs of ring */
+	dma_addr_t rx_desc_phy_addr[NUM_RX_DESC];	/* Rx data buffer physical dma address */
+	dma_addr_t rx_phy_addr;				/* Rx desc physical address */
+	struct page *rx_databuff[NUM_RX_DESC];		/* Rx data buffers */
+	u16 rdsar_reg;					/* Receive Descriptor Start Address */
+};
+
 struct rtl8169_napi {
 	struct napi_struct napi;
 	void *priv;
@@ -744,6 +759,13 @@ struct rtl8169_irq {
 	char		name[IFNAMSIZ + 10];
 };
 
+enum rx_desc_ring_type {
+	RX_DESC_RING_TYPE_UNKNOWN = 0,
+	RX_DESC_RING_TYPE_DEFAULT,
+	RX_DESC_RING_TYPE_RSS,
+	RX_DESC_RING_TYPE_MAX
+};
+
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
@@ -752,28 +774,28 @@ struct rtl8169_private {
 	struct napi_struct napi;
 	enum mac_version mac_version;
 	enum rtl_dash_type dash_type;
-	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
 	u32 dirty_tx;
 	struct TxDesc *TxDescArray;	/* 256-aligned Tx descriptor ring */
-	struct RxDesc *RxDescArray;	/* 256-aligned Rx descriptor ring */
 	dma_addr_t TxPhyAddr;
-	dma_addr_t RxPhyAddr;
-	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
 	struct rtl8169_irq irq_tbl[R8169_MAX_MSIX_VEC];
 	struct rtl8169_napi r8169napi[R8169_MAX_MSIX_VEC];
+	struct rtl8169_rx_ring rx_ring[R8169_MAX_RX_QUEUES];
 	u16 isr_reg[R8169_MAX_MSIX_VEC];
 	u16 imr_reg[R8169_MAX_MSIX_VEC];
 	unsigned int num_rx_rings;
 	u16 cp_cmd;
 	u16 tx_lpi_timer;
 	u32 irq_mask;
+	u16 hw_supp_num_rx_queues;
 	u8 min_irq_nvecs;
 	u8 max_irq_nvecs;
 	u8 hw_supp_isr_ver;
 	u8 hw_curr_isr_ver;
 	u8 irq_nvecs;
+	u8 init_rx_desc_type;
+	u8 recheck_desc_ownbit;
 	int irq;
 	struct clk *clk;
 
@@ -2647,9 +2669,27 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_rx_desc_init(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		memset(ring->rx_desc_array, 0x0, ring->rx_desc_alloc_size);
+	}
+}
+
 static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 {
-	tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
+	tp->dirty_tx = 0;
+	tp->cur_tx = 0;
+
+	for (int i = 0; i < tp->hw_supp_num_rx_queues; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		ring->dirty_rx = 0;
+		ring->cur_rx = 0;
+		ring->index = i;
+	}
 }
 
 static void rtl_jumbo_config(struct rtl8169_private *tp)
@@ -2708,8 +2748,18 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
 	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
 
+static void rtl_set_ring_size(struct rtl8169_private *tp, u32 rx_num)
+{
+	for (int i = 0; i < tp->hw_supp_num_rx_queues; i++)
+		tp->rx_ring[i].num_rx_desc = rx_num;
+}
+
 static void rtl_setup_mqs_reg(struct rtl8169_private *tp)
 {
+	tp->rx_ring[0].rdsar_reg = RxDescAddrLow;
+	for (int i = 1; i < tp->hw_supp_num_rx_queues; i++)
+		tp->rx_ring[i].rdsar_reg = (u16)(RDSAR_Q1_LOW + (i - 1) * 8);
+
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_52) {
 		tp->isr_reg[0] = IntrStatus;
 		tp->imr_reg[0] = IntrMask;
@@ -2733,17 +2783,21 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_80:
 		tp->min_irq_nvecs = 1;
 		tp->max_irq_nvecs = 1;
+		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
 		tp->hw_supp_isr_ver = 6;
 		break;
 	default:
 		tp->min_irq_nvecs = 1;
 		tp->max_irq_nvecs = 1;
+		tp->hw_supp_num_rx_queues = 1;
 		tp->hw_supp_isr_ver = 1;
 		break;
 	}
+	tp->init_rx_desc_type = RX_DESC_RING_TYPE_DEFAULT;
 	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
 
 	rtl_setup_mqs_reg(tp);
+	rtl_set_ring_size(tp, NUM_RX_DESC);
 }
 
 static void rtl_request_firmware(struct rtl8169_private *tp)
@@ -2877,8 +2931,13 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 	 */
 	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
 	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		RTL_W32(tp, ring->rdsar_reg, ((u64)ring->rx_phy_addr) & DMA_BIT_MASK(32));
+		RTL_W32(tp, ring->rdsar_reg + 4, ((u64)ring->rx_phy_addr >> 32));
+	}
 }
 
 static void rtl8169_set_magic_reg(struct rtl8169_private *tp)
@@ -4214,7 +4273,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static void rtl8169_mark_to_asic(struct RxDesc *desc)
+static void rtl8169_mark_to_asic_default(struct RxDesc *desc)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
 
@@ -4224,13 +4283,19 @@ static void rtl8169_mark_to_asic(struct RxDesc *desc)
 	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
 }
 
+static void rtl8169_mark_to_asic(struct rtl8169_private *tp, struct RxDesc *desc)
+{
+	rtl8169_mark_to_asic_default(desc);
+}
+
 static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
-					  struct RxDesc *desc)
+					  struct rtl8169_rx_ring *ring, unsigned int index)
 {
 	struct device *d = tp_to_dev(tp);
 	int node = dev_to_node(d);
 	dma_addr_t mapping;
 	struct page *data;
+	struct RxDesc *desc = ring->rx_desc_array + index;
 
 	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
 	if (!data)
@@ -4244,55 +4309,111 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 	}
 
 	desc->addr = cpu_to_le64(mapping);
-	rtl8169_mark_to_asic(desc);
+	ring->rx_desc_phy_addr[index] = mapping;
+	rtl8169_mark_to_asic(tp, desc);
 
 	return data;
 }
 
-static void rtl8169_rx_clear(struct rtl8169_private *tp)
+static void rtl8169_rx_clear(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
 {
 	int i;
 
-	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
+	for (i = 0; i < NUM_RX_DESC && ring->rx_databuff[i]; i++) {
 		dma_unmap_page(tp_to_dev(tp),
-			       le64_to_cpu(tp->RxDescArray[i].addr),
+			       ring->rx_desc_phy_addr[i],
 			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
-		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
-		tp->Rx_databuff[i] = NULL;
-		tp->RxDescArray[i].addr = 0;
-		tp->RxDescArray[i].opts1 = 0;
+		__free_pages(ring->rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
+		ring->rx_databuff[i] = NULL;
+		ring->rx_desc_phy_addr[i] = 0;
+		ring->rx_desc_array[i].addr = 0;
+		ring->rx_desc_array[i].opts1 = 0;
 	}
 }
 
-static int rtl8169_rx_fill(struct rtl8169_private *tp)
+static void rtl8169_mark_as_last_descriptor_default(struct RxDesc *desc)
+{
+	desc->opts1 |= cpu_to_le32(RingEnd);
+}
+
+static void rtl8169_mark_as_last_descriptor(struct rtl8169_private *tp, struct RxDesc *desc)
+{
+	rtl8169_mark_as_last_descriptor_default(desc);
+}
+
+static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
 {
 	int i;
 
 	for (i = 0; i < NUM_RX_DESC; i++) {
 		struct page *data;
 
-		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
+		data = rtl8169_alloc_rx_data(tp, ring, i);
 		if (!data) {
-			rtl8169_rx_clear(tp);
+			rtl8169_rx_clear(tp, ring);
 			return -ENOMEM;
 		}
-		tp->Rx_databuff[i] = data;
+		ring->rx_databuff[i] = data;
 	}
 
 	/* mark as last descriptor in the ring */
-	tp->RxDescArray[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
+	rtl8169_mark_as_last_descriptor(tp, &ring->rx_desc_array[NUM_RX_DESC - 1]);
+
+	return 0;
+}
+
+static int rtl8169_alloc_rx_desc(struct rtl8169_private *tp)
+{
+	struct rtl8169_rx_ring *ring;
+	struct pci_dev *pdev = tp->pci_dev;
 
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		ring = &tp->rx_ring[i];
+		ring->rx_desc_alloc_size = (ring->num_rx_desc + 1) * sizeof(struct RxDesc);
+		ring->rx_desc_array = dma_alloc_coherent(&pdev->dev,
+							 ring->rx_desc_alloc_size,
+							 &ring->rx_phy_addr,
+							 GFP_KERNEL);
+		if (!ring->rx_desc_array)
+			return -1;
+	}
 	return 0;
 }
 
+static void rtl8169_free_rx_desc(struct rtl8169_private *tp)
+{
+	struct rtl8169_rx_ring *ring;
+	struct pci_dev *pdev = tp->pci_dev;
+
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		ring = &tp->rx_ring[i];
+		if (ring->rx_desc_array) {
+			dma_free_coherent(&pdev->dev,
+					  ring->rx_desc_alloc_size,
+					  ring->rx_desc_array,
+					  ring->rx_phy_addr);
+			ring->rx_desc_array = NULL;
+		}
+	}
+}
+
 static int rtl8169_init_ring(struct rtl8169_private *tp)
 {
+	int retval = 0;
+
 	rtl8169_init_ring_indexes(tp);
+	rtl8169_rx_desc_init(tp);
 
 	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
-	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
 
-	return rtl8169_rx_fill(tp);
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
+		retval = rtl8169_rx_fill(tp, ring);
+	}
+
+	return retval;
 }
 
 static void rtl8169_unmap_tx_skb(struct rtl8169_private *tp, unsigned int entry)
@@ -4381,16 +4502,24 @@ static void rtl8169_cleanup(struct rtl8169_private *tp)
 	rtl8169_init_ring_indexes(tp);
 }
 
-static void rtl_reset_work(struct rtl8169_private *tp)
+static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
 {
-	int i;
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
 
+		for (int j = 0; j < ring->num_rx_desc; j++)
+			rtl8169_mark_to_asic(tp, ring->rx_desc_array + j);
+	}
+}
+
+static void rtl_reset_work(struct rtl8169_private *tp)
+{
 	netif_stop_queue(tp->dev);
 
 	rtl8169_cleanup(tp);
 
-	for (i = 0; i < NUM_RX_DESC; i++)
-		rtl8169_mark_to_asic(tp->RxDescArray + i);
+	rtl8169_rx_desc_reset(tp);
+
 	rtl8169_napi_enable(tp);
 
 	rtl_hw_start(tp);
@@ -4784,6 +4913,11 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 }
 
+static void rtl8169_desc_quirk(struct rtl8169_private *tp)
+{
+	RTL_R8(tp, tp->imr_reg[0]);
+}
+
 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
 		   int budget)
 {
@@ -4836,9 +4970,11 @@ static inline int rtl8169_fragmented_frame(u32 status)
 	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
 }
 
-static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
+static inline void rtl8169_rx_csum_default(struct rtl8169_private *tp,
+					   struct sk_buff *skb,
+					   struct RxDesc *desc)
 {
-	u32 status = opts1 & (RxProtoMask | RxCSFailMask);
+	u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
 
 	if (status == RxProtoTCP || status == RxProtoUDP)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -4846,22 +4982,71 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
 		skb_checksum_none_assert(skb);
 }
 
-static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget)
+static inline void rtl8169_rx_csum(struct rtl8169_private *tp,
+				   struct sk_buff *skb,
+				   struct RxDesc *desc)
+{
+	rtl8169_rx_csum_default(tp, skb, desc);
+}
+
+static u32 rtl8169_rx_desc_opts1(struct rtl8169_private *tp, struct RxDesc *desc)
+{
+	return READ_ONCE(desc->opts1);
+}
+
+static bool rtl8169_check_rx_desc_error(struct net_device *dev,
+					struct rtl8169_private *tp,
+					u32 status)
+{
+	if (unlikely(status & RxRES)) {
+		if (status & (RxRWT | RxRUNT))
+			dev->stats.rx_length_errors++;
+		if (status & RxCRC)
+			dev->stats.rx_crc_errors++;
+		return true;
+	}
+	return false;
+}
+
+static inline void rtl8169_set_desc_dma_addr(struct rtl8169_private *tp,
+					     struct RxDesc *desc,
+					     dma_addr_t mapping)
+{
+	desc->addr = cpu_to_le64(mapping);
+}
+
+static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
+		  struct rtl8169_rx_ring *ring, int budget)
 {
 	struct device *d = tp_to_dev(tp);
 	int count;
 
-	for (count = 0; count < budget; count++, tp->cur_rx++) {
-		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
-		struct RxDesc *desc = tp->RxDescArray + entry;
+	for (count = 0; count < budget; count++, ring->cur_rx++) {
+		unsigned int pkt_size, entry = ring->cur_rx % ring->num_rx_desc;
+		struct RxDesc *desc = ring->rx_desc_array + entry;
 		struct sk_buff *skb;
 		const void *rx_buf;
 		dma_addr_t addr;
 		u32 status;
 
-		status = le32_to_cpu(READ_ONCE(desc->opts1));
-		if (status & DescOwn)
-			break;
+		status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
+
+		if (status & DescOwn) {
+			if (!tp->recheck_desc_ownbit)
+				break;
+
+			/* Workaround for a hardware issue:
+			 * Hardware might trigger RX interrupt before the DMA
+			 * engine fully updates RX desc ownbit in host memory.
+			 * So we do a quirk and re-read to avoid missing RX
+			 * packets.
+			 */
+			tp->recheck_desc_ownbit = false;
+			rtl8169_desc_quirk(tp);
+			status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
+			if (status & DescOwn)
+				break;
+		}
 
 		/* This barrier is needed to keep us from reading
 		 * any other fields out of the Rx descriptor until
@@ -4869,20 +5054,15 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		 */
 		dma_rmb();
 
-		if (unlikely(status & RxRES)) {
+		if (rtl8169_check_rx_desc_error(dev, tp, status)) {
 			if (net_ratelimit())
 				netdev_warn(dev, "Rx ERROR. status = %08x\n",
 					    status);
+
 			dev->stats.rx_errors++;
-			if (status & (RxRWT | RxRUNT))
-				dev->stats.rx_length_errors++;
-			if (status & RxCRC)
-				dev->stats.rx_crc_errors++;
 
 			if (!(dev->features & NETIF_F_RXALL))
 				goto release_descriptor;
-			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
-				goto release_descriptor;
 		}
 
 		pkt_size = status & GENMASK(13, 0);
@@ -4898,14 +5078,14 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 			goto release_descriptor;
 		}
 
-		skb = napi_alloc_skb(&tp->r8169napi[0].napi, pkt_size);
+		skb = napi_alloc_skb(&tp->r8169napi[ring->index].napi, pkt_size);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
 			goto release_descriptor;
 		}
 
-		addr = le64_to_cpu(desc->addr);
-		rx_buf = page_address(tp->Rx_databuff[entry]);
+		addr = ring->rx_desc_phy_addr[entry];
+		rx_buf = page_address(ring->rx_databuff[entry]);
 
 		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
 		prefetch(rx_buf);
@@ -4914,7 +5094,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		skb->len = pkt_size;
 		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
-		rtl8169_rx_csum(skb, status);
+		rtl8169_rx_csum(tp, skb, desc);
 		skb->protocol = eth_type_trans(skb, dev);
 
 		rtl8169_rx_vlan_tag(desc, skb);
@@ -4922,11 +5102,12 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		if (skb->pkt_type == PACKET_MULTICAST)
 			dev->stats.multicast++;
 
-		napi_gro_receive(&tp->r8169napi[0].napi, skb);
+		napi_gro_receive(&tp->r8169napi[ring->index].napi, skb);
 
 		dev_sw_netstats_rx_add(dev, pkt_size);
 release_descriptor:
-		rtl8169_mark_to_asic(desc);
+		rtl8169_set_desc_dma_addr(tp, desc, ring->rx_desc_phy_addr[entry]);
+		rtl8169_mark_to_asic(tp, desc);
 	}
 
 	return count;
@@ -4952,6 +5133,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		phy_mac_interrupt(tp->phydev);
 
 	rtl_irq_disable(tp);
+	tp->recheck_desc_ownbit = true;
 	napi_schedule(&napi->napi);
 out:
 	rtl_ack_events(tp, status);
@@ -5040,7 +5222,8 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 
 	rtl_tx(dev, tp, budget);
 
-	work_done = rtl_rx(dev, tp, budget);
+	for (int i = 0; i < tp->num_rx_rings; i++)
+		work_done += rtl_rx(dev, tp, &tp->rx_ring[i], budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done))
 		rtl_irq_enable(tp);
@@ -5168,21 +5351,21 @@ static int rtl8169_close(struct net_device *dev)
 	struct pci_dev *pdev = tp->pci_dev;
 
 	pm_runtime_get_sync(&pdev->dev);
-
 	netif_stop_queue(dev);
+
 	rtl8169_down(tp);
-	rtl8169_rx_clear(tp);
+	for (int i = 0; i < tp->num_rx_rings; i++)
+		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
 
 	rtl8169_free_irq(tp);
 
 	phy_disconnect(tp->phydev);
 
-	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
-			  tp->RxPhyAddr);
 	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
 			  tp->TxPhyAddr);
 	tp->TxDescArray = NULL;
-	tp->RxDescArray = NULL;
+
+	rtl8169_free_rx_desc(tp);
 
 	pm_runtime_put_sync(&pdev->dev);
 
@@ -5211,16 +5394,15 @@ static int rtl_open(struct net_device *dev)
 	 * Rx and Tx descriptors needs 256 bytes alignment.
 	 * dma_alloc_coherent provides more.
 	 */
+
 	tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES,
 					     &tp->TxPhyAddr, GFP_KERNEL);
 	if (!tp->TxDescArray)
-		goto out;
-
-	tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES,
-					     &tp->RxPhyAddr, GFP_KERNEL);
-	if (!tp->RxDescArray)
 		goto err_free_tx_0;
 
+	if (rtl8169_alloc_rx_desc(tp) < 0)
+		goto err_free_rx_1;
+
 	retval = rtl8169_init_ring(tp);
 	if (retval < 0)
 		goto err_free_rx_1;
@@ -5249,11 +5431,10 @@ static int rtl_open(struct net_device *dev)
 	rtl8169_free_irq(tp);
 err_release_fw_2:
 	rtl_release_firmware(tp);
-	rtl8169_rx_clear(tp);
+	for (int i = 0; i < tp->num_rx_rings; i++)
+		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
 err_free_rx_1:
-	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
-			  tp->RxPhyAddr);
-	tp->RxDescArray = NULL;
+	rtl8169_free_rx_desc(tp);
 err_free_tx_0:
 	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
 			  tp->TxPhyAddr);
@@ -5767,7 +5948,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	u32 txconfig;
 	u32 xid;
 
-	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
+	dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(*tp),
+				      1,
+				      R8169_MAX_RX_QUEUES);
+
 	if (!dev)
 		return -ENOMEM;
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Patch net-next v1 3/7] r8169: add support for new interrupt mapping
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
  2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
  2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
@ 2026-05-06  8:13 ` javen
  2026-05-06  8:13 ` [Patch net-next v1 4/7] r8169: enable " javen
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

To support RSS, the number of hardware interrupt bits should match the
interrupt of software. So we add support for new interrupt mapping here.
ISR_VER_MAP_REG is the hardware register to indicate interrupt status.
IMR_SET_VEC_MAP_REG is interrupt mask which is set to enable irq.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 155 ++++++++++++++++++++--
 1 file changed, 145 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index bc75dbb9901d..671f82c326d9 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -77,6 +77,7 @@
 #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
 #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
 #define R8169_MAX_RX_QUEUES	8
+#define R8127_MAX_TX_QUEUES	8
 #define R8169_MAX_MSIX_VEC	32
 #define R8127_MAX_RX_QUEUES	8
 
@@ -449,8 +450,14 @@ enum rtl8125_registers {
 	Q_NUM_CTRL_8125		= 0x4800,
 	EEE_TXIDLE_TIMER_8125	= 0x6048,
 	RDSAR_Q1_LOW		= 0x4000,
+	IMR_SET_VEC_MAP_REG	= 0x0d0c,
+	IMR_CLEAR_VEC_MAP_REG	= 0x0d00,
+	ISR_VEC_MAP_REG		= 0x0d04,
 };
 
+#define MSIX_ID_VEC_MAP_LINKCHG		29
+#define RTL_VEC_MAP_ENABLE		BIT(0)
+
 #define LEDSEL_MASK_8125	0x23f
 
 #define RX_VLAN_INNER_8125	BIT(22)
@@ -581,6 +588,9 @@ enum rtl_register_content {
 
 	/* magic enable v2 */
 	MagicPacket_v2	= (1 << 16),	/* Wake up when receives a Magic Packet */
+#define	ISRIMR_LINKCHG	BIT(29)
+#define	ISRIMR_TOK_Q0	BIT(8)
+#define	ISRIMR_ROK_Q0	BIT(0)
 };
 
 enum rtl_desc_bit {
@@ -796,6 +806,7 @@ struct rtl8169_private {
 	u8 irq_nvecs;
 	u8 init_rx_desc_type;
 	u8 recheck_desc_ownbit;
+	unsigned int features;
 	int irq;
 	struct clk *clk;
 
@@ -1694,26 +1705,36 @@ static u32 rtl_get_events(struct rtl8169_private *tp)
 
 static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)
 {
-	if (rtl_is_8125(tp))
+	if (rtl_is_8125(tp)) {
 		RTL_W32(tp, IntrStatus_8125, bits);
-	else
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			RTL_W32(tp, ISR_VEC_MAP_REG, 0xffffffff);
+	} else {
 		RTL_W16(tp, IntrStatus, bits);
+	}
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-	if (rtl_is_8125(tp))
+	if (rtl_is_8125(tp)) {
 		RTL_W32(tp, IntrMask_8125, 0);
-	else
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, 0xffffffff);
+	} else {
 		RTL_W16(tp, IntrMask, 0);
+	}
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp)
 {
-	if (rtl_is_8125(tp))
-		RTL_W32(tp, IntrMask_8125, tp->irq_mask);
-	else
+	if (rtl_is_8125(tp)) {
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			RTL_W32(tp, IMR_SET_VEC_MAP_REG, tp->irq_mask);
+		else
+			RTL_W32(tp, IntrMask_8125, tp->irq_mask);
+	} else {
 		RTL_W16(tp, IntrMask, tp->irq_mask);
+	}
 }
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
@@ -5154,6 +5175,44 @@ static void rtl8169_free_irq(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_disable_hw_interrupt_msix(struct rtl8169_private *tp, int message_id)
+{
+	RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, BIT(message_id));
+}
+
+static void rtl8169_clear_hw_isr(struct rtl8169_private *tp, int message_id)
+{
+	RTL_W32(tp, ISR_VEC_MAP_REG, BIT(message_id));
+}
+
+static void rtl8169_enable_hw_interrupt_msix(struct rtl8169_private *tp, int message_id)
+{
+	RTL_W32(tp, IMR_SET_VEC_MAP_REG, BIT(message_id));
+}
+
+static irqreturn_t rtl8169_interrupt_msix(int irq, void *dev_instance)
+{
+	struct rtl8169_napi *napi = dev_instance;
+	struct rtl8169_private *tp = napi->priv;
+	int message_id = napi->index;
+
+	rtl8169_disable_hw_interrupt_msix(tp, message_id);
+
+	rtl8169_clear_hw_isr(tp, message_id);
+
+	if (message_id == MSIX_ID_VEC_MAP_LINKCHG) {
+		phy_mac_interrupt(tp->phydev);
+		rtl8169_enable_hw_interrupt_msix(tp, message_id);
+		return IRQ_HANDLED;
+	}
+
+	tp->recheck_desc_ownbit = true;
+
+	napi_schedule(&napi->napi);
+
+	return IRQ_HANDLED;
+}
+
 static int rtl8169_request_irq(struct rtl8169_private *tp)
 {
 	const int len = sizeof(tp->irq_tbl[0].name);
@@ -5164,6 +5223,10 @@ static int rtl8169_request_irq(struct rtl8169_private *tp)
 
 	for (int i = 0; i < tp->irq_nvecs; i++) {
 		irq = &tp->irq_tbl[i];
+		if (tp->features & RTL_VEC_MAP_ENABLE && tp->hw_curr_isr_ver > 1)
+			irq->handler = rtl8169_interrupt_msix;
+		else
+			irq->handler = rtl8169_interrupt;
 
 		napi = &tp->r8169napi[i];
 		snprintf(irq->name, len, "%s-%d", dev->name, i);
@@ -5622,10 +5685,16 @@ static const struct net_device_ops rtl_netdev_ops = {
 
 static void rtl_set_irq_mask(struct rtl8169_private *tp)
 {
-	tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
+	if (tp->features & RTL_VEC_MAP_ENABLE) {
+		tp->irq_mask = ISRIMR_LINKCHG | ISRIMR_TOK_Q0;
+		for (int i = 0; i < tp->num_rx_rings; i++)
+			tp->irq_mask |= ISRIMR_ROK_Q0 << i;
+	} else {
+		tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
 
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-		tp->irq_mask |= SYSErr | RxFIFOOver;
+		if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+			tp->irq_mask |= SYSErr | RxFIFOOver;
+	}
 }
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
@@ -5659,6 +5728,9 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 	tp->irq = pci_irq_vector(pdev, 0);
 	tp->irq_nvecs = nvecs;
 
+	if (nvecs > 1)
+		tp->features |= RTL_VEC_MAP_ENABLE;
+
 	return 0;
 }
 
@@ -5925,6 +5997,53 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
 	return false;
 }
 
+static int rtl8169_poll_msix_rx(struct napi_struct *napi, int budget)
+{
+	struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi, napi);
+	struct rtl8169_private *tp = r8169_napi->priv;
+	const int message_id = r8169_napi->index;
+	struct net_device *dev = tp->dev;
+	int work_done = 0;
+
+	if (message_id < tp->num_rx_rings)
+		work_done += rtl_rx(dev, tp, &tp->rx_ring[message_id], budget);
+
+	if (work_done < budget && napi_complete_done(napi, work_done))
+		rtl8169_enable_hw_interrupt_msix(tp, message_id);
+
+	return work_done;
+}
+
+static int rtl8169_poll_msix_tx(struct napi_struct *napi, int budget)
+{
+	struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi, napi);
+	struct rtl8169_private *tp = r8169_napi->priv;
+	const int message_id = r8169_napi->index;
+	int tx_ring_idx = message_id - 8;
+	struct net_device *dev = tp->dev;
+	unsigned int work_done = 0;
+
+	if (tx_ring_idx >= 0)
+		rtl_tx(dev, tp, budget);
+
+	if (work_done < budget && napi_complete_done(napi, work_done))
+		rtl8169_enable_hw_interrupt_msix(tp, message_id);
+
+	return work_done;
+}
+
+static int rtl8169_poll_msix_other(struct napi_struct *napi, int budget)
+{
+	struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi, napi);
+	struct rtl8169_private *tp = r8169_napi->priv;
+	const int message_id = r8169_napi->index;
+
+	napi_complete_done(napi, budget);
+	rtl8169_enable_hw_interrupt_msix(tp, message_id);
+
+	return 1;
+}
+
 static void r8169_init_napi(struct rtl8169_private *tp)
 {
 	for (int i = 0; i < tp->irq_nvecs; i++) {
@@ -5932,6 +6051,22 @@ static void r8169_init_napi(struct rtl8169_private *tp)
 		int (*poll)(struct napi_struct *napi, int budget);
 
 		poll = rtl8169_poll;
+		if (tp->features & RTL_VEC_MAP_ENABLE) {
+			switch (tp->hw_curr_isr_ver) {
+			case 6:
+				if (i < R8127_MAX_RX_QUEUES)
+					poll = rtl8169_poll_msix_rx;
+				else if (i >= R8127_MAX_RX_QUEUES &&
+					 i < (R8127_MAX_RX_QUEUES +
+					 R8127_MAX_TX_QUEUES))
+					poll = rtl8169_poll_msix_tx;
+				else
+					poll = rtl8169_poll_msix_other;
+				break;
+			default:
+				break;
+			}
+		}
 		netif_napi_add(tp->dev, &r8169napi->napi, poll);
 		r8169napi->priv = tp;
 		r8169napi->index = i;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Patch net-next v1 4/7] r8169: enable new interrupt mapping
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
                   ` (2 preceding siblings ...)
  2026-05-06  8:13 ` [Patch net-next v1 3/7] r8169: add support for new interrupt mapping javen
@ 2026-05-06  8:13 ` javen
  2026-05-06  8:13 ` [Patch net-next v1 5/7] r8169: add support and enable rss javen
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch enables new interrupt mapping for RTL8127.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 671f82c326d9..69601e077646 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4004,6 +4004,15 @@ DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
 	return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
 }
 
+static void rtl8125_hw_set_interrupt_type(struct rtl8169_private *tp)
+{
+	u8 tmp;
+
+	tmp = RTL_R8(tp, INT_CFG0_8125);
+	tmp |= INT_CFG0_ENABLE_8125;
+	RTL_W8(tp, INT_CFG0_8125, tmp);
+}
+
 static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 {
 	rtl_pcie_state_l2l3_disable(tp);
@@ -4012,6 +4021,9 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 	RTL_W32(tp, RSS_CTRL_8125, 0);
 	RTL_W16(tp, Q_NUM_CTRL_8125, 0);
 
+	if (tp->features & RTL_VEC_MAP_ENABLE)
+		rtl8125_hw_set_interrupt_type(tp);
+
 	/* disable UPS */
 	r8168_mac_ocp_modify(tp, 0xd40a, 0x0010, 0x0000);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Patch net-next v1 5/7] r8169: add support and enable rss
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
                   ` (3 preceding siblings ...)
  2026-05-06  8:13 ` [Patch net-next v1 4/7] r8169: enable " javen
@ 2026-05-06  8:13 ` javen
  2026-05-06  8:13 ` [Patch net-next v1 6/7] r8169: move struct ethtool_ops javen
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch adds support and enable rss for RTL8127.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 361 ++++++++++++++++++++--
 1 file changed, 343 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 69601e077646..9b42cee24b8a 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -80,6 +80,18 @@
 #define R8127_MAX_TX_QUEUES	8
 #define R8169_MAX_MSIX_VEC	32
 #define R8127_MAX_RX_QUEUES	8
+#define R8127_MAX_IRQ		32
+#define R8127_MIN_IRQ		30
+#define RTL_RSS_KEY_SIZE	40
+#define RSS_CPU_NUM_OFFSET	16
+#define RSS_MASK_BITS_OFFSET	8
+#define RTL_MAX_INDIRECTION_TABLE_ENTRIES 128
+#define RXS_RSS_UDP		BIT(27)
+#define RXS_RSS_IPV4		BIT(28)
+#define RXS_RSS_IPV6		BIT(29)
+#define RXS_RSS_TCP		BIT(30)
+#define RXS_RSS_L3_TYPE_MASK	(RXS_RSS_IPV4 | RXS_RSS_IPV6)
+#define RXS_RSS_L4_TYPE_MASK	(RXS_RSS_TCP | RXS_RSS_UDP)
 
 #define OCP_STD_PHY_BASE	0xa400
 
@@ -449,6 +461,7 @@ enum rtl8125_registers {
 	RSS_CTRL_8125		= 0x4500,
 	Q_NUM_CTRL_8125		= 0x4800,
 	EEE_TXIDLE_TIMER_8125	= 0x6048,
+	TNPDS_Q1_LOW		= 0x2100,
 	RDSAR_Q1_LOW		= 0x4000,
 	IMR_SET_VEC_MAP_REG	= 0x0d0c,
 	IMR_CLEAR_VEC_MAP_REG	= 0x0d00,
@@ -456,7 +469,23 @@ enum rtl8125_registers {
 };
 
 #define MSIX_ID_VEC_MAP_LINKCHG		29
+#define RSS_CTRL_TCP_IPV4_SUPP		BIT(0)
+#define RSS_CTRL_IPV4_SUPP		BIT(1)
+#define RSS_CTRL_TCP_IPV6_SUPP		BIT(2)
+#define RSS_CTRL_IPV6_SUPP		BIT(3)
+#define RSS_CTRL_IPV6_EXT_SUPP		BIT(4)
+#define RSS_CTRL_TCP_IPV6_EXT_SUPP	BIT(5)
+#define RSS_CTRL_UDP_IPV4_SUPP		BIT(6)
+#define RSS_CTRL_UDP_IPV6_SUPP		BIT(7)
+#define RSS_CTRL_UDP_IPV6_EXT_SUPP	BIT(8)
+#define RTL_RSS_FLAG_HASH_UDP_IPV4	BIT(0)
+#define RTL_RSS_FLAG_HASH_UDP_IPV6	BIT(1)
+#define	RX_RES_RSS			BIT(22)
+#define	RX_RUNT_RSS			BIT(21)
+#define	RX_CRC_RSS			BIT(20)
 #define RTL_VEC_MAP_ENABLE		BIT(0)
+#define RSS_INDIRECTION_TBL_REG		0x4700
+#define RSS_KEY_REG			0x4600
 
 #define LEDSEL_MASK_8125	0x23f
 
@@ -648,6 +677,11 @@ enum rtl_rx_desc_bit {
 #define RxProtoIP	(PID1 | PID0)
 #define RxProtoMask	RxProtoIP
 
+#define	RX_UDPT_DESC_RSS	BIT(19)
+#define	RX_TCPT_DESC_RSS	BIT(18)
+#define	RX_UDPF_DESC_RSS	BIT(16) /* UDP/IP checksum failed */
+#define	RX_TCPF_DESC_RSS	BIT(15) /* TCP/IP checksum failed */
+
 	IPFail		= (1 << 16), /* IP checksum failed */
 	UDPFail		= (1 << 15), /* UDP/IP checksum failed */
 	TCPFail		= (1 << 14), /* TCP/IP checksum failed */
@@ -674,6 +708,21 @@ struct RxDesc {
 	__le64 addr;
 };
 
+struct rx_desc_rss {
+	union {
+		__le64 addr;
+		struct {
+			__le32 rss_info;
+			__le32 rss_result;
+		} rx_desc_rss_dword;
+	};
+
+	struct {
+		__le32 opts2;
+		__le32 opts1;
+	} rx_desc_opts;
+};
+
 struct ring_info {
 	struct sk_buff	*skb;
 	u32		len;
@@ -795,9 +844,13 @@ struct rtl8169_private {
 	u16 isr_reg[R8169_MAX_MSIX_VEC];
 	u16 imr_reg[R8169_MAX_MSIX_VEC];
 	unsigned int num_rx_rings;
+	u32 rss_flags;
 	u16 cp_cmd;
 	u16 tx_lpi_timer;
 	u32 irq_mask;
+	u8 rss_key[RTL_RSS_KEY_SIZE];
+	u8 rss_indir_tbl[RTL_MAX_INDIRECTION_TABLE_ENTRIES];
+	u8 hw_supp_indir_tbl_entries;
 	u16 hw_supp_num_rx_queues;
 	u8 min_irq_nvecs;
 	u8 max_irq_nvecs;
@@ -1639,6 +1692,13 @@ static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
 	}
 }
 
+static bool rtl_check_rss_support(struct rtl8169_private *tp)
+{
+	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
+		return true;
+	return false;
+}
+
 static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
@@ -1938,9 +1998,20 @@ static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
 		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
 }
 
-static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
+static void rtl8169_rx_vlan_tag(struct rtl8169_private *tp,
+				struct RxDesc *desc,
+				struct sk_buff *skb)
 {
-	u32 opts2 = le32_to_cpu(desc->opts2);
+	u32 opts2;
+
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		opts2 = le32_to_cpu(((struct rx_desc_rss *)desc)->rx_desc_opts.opts2);
+		break;
+	default:
+		opts2 = le32_to_cpu(desc->opts2);
+		break;
+	}
 
 	if (opts2 & RxVlanTag)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
@@ -2796,15 +2867,24 @@ static void rtl_setup_mqs_reg(struct rtl8169_private *tp)
 		tp->imr_reg[i] = (u16)(INTR_VEC_MAP_MASK + (i - 1) * 4);
 }
 
+static void rtl8169_init_rss(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->hw_supp_indir_tbl_entries; i++)
+		tp->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, tp->num_rx_rings);
+
+	netdev_rss_key_fill(tp->rss_key, RTL_RSS_KEY_SIZE);
+}
+
 static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
 {
 	tp->num_rx_rings = 1;
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_80:
-		tp->min_irq_nvecs = 1;
-		tp->max_irq_nvecs = 1;
+		tp->min_irq_nvecs = R8127_MIN_IRQ;
+		tp->max_irq_nvecs = R8127_MAX_IRQ;
 		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
+		tp->hw_supp_indir_tbl_entries = RTL_MAX_INDIRECTION_TABLE_ENTRIES;
 		tp->hw_supp_isr_ver = 6;
 		break;
 	default:
@@ -2943,6 +3023,76 @@ static void rtl_set_rx_max_size(struct rtl8169_private *tp)
 	RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
 }
 
+static void rtl8169_store_rss_key(struct rtl8169_private *tp)
+{
+	const u16 rss_key_reg = RSS_KEY_REG;
+	u32 i, rss_key_size = sizeof(tp->rss_key);
+	u32 *rss_key = (u32 *)tp->rss_key;
+
+	/* Write redirection table to HW */
+	for (i = 0; i < rss_key_size; i += 4)
+		RTL_W32(tp, rss_key_reg + i, *rss_key++);
+}
+
+static void rtl8169_store_reta(struct rtl8169_private *tp)
+{
+	u16 indir_tbl_reg = RSS_INDIRECTION_TBL_REG;
+	u32 i, reta_entries = tp->hw_supp_indir_tbl_entries;
+	u32 reta = 0;
+	u8 *indir_tbl = tp->rss_indir_tbl;
+
+	/* Write redirection table to HW */
+	for (i = 0; i < reta_entries; i++) {
+		reta |= indir_tbl[i] << (i & 0x3) * 8;
+		if ((i & 3) == 3) {
+			RTL_W32(tp, indir_tbl_reg, reta);
+			indir_tbl_reg += 4;
+			reta = 0;
+		}
+	}
+}
+
+static int rtl8169_set_rss_hash_opt(struct rtl8169_private *tp)
+{
+	u32 rss_flags = tp->rss_flags;
+	u32 hash_mask_len;
+	u32 rss_ctrl;
+
+	rss_ctrl = ilog2(tp->num_rx_rings);
+	rss_ctrl &= (BIT(0) | BIT(1) | BIT(2));
+	rss_ctrl <<= RSS_CPU_NUM_OFFSET;
+
+	/* Perform hash on these packet types */
+	rss_ctrl |= RSS_CTRL_TCP_IPV4_SUPP
+		 | RSS_CTRL_IPV4_SUPP
+		 | RSS_CTRL_IPV6_SUPP
+		 | RSS_CTRL_IPV6_EXT_SUPP
+		 | RSS_CTRL_TCP_IPV6_SUPP
+		 | RSS_CTRL_TCP_IPV6_EXT_SUPP;
+
+	if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV4)
+		rss_ctrl |= RSS_CTRL_UDP_IPV4_SUPP;
+
+	if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV6)
+		rss_ctrl |= RSS_CTRL_UDP_IPV6_SUPP |
+			    RSS_CTRL_UDP_IPV6_EXT_SUPP;
+
+	hash_mask_len = ilog2(tp->hw_supp_indir_tbl_entries);
+	hash_mask_len &= (BIT(0) | BIT(1) | BIT(2));
+	rss_ctrl |= hash_mask_len << RSS_MASK_BITS_OFFSET;
+
+	RTL_W32(tp, RSS_CTRL_8125, rss_ctrl);
+
+	return 0;
+}
+
+static void rtl_set_rss_config(struct rtl8169_private *tp)
+{
+	rtl8169_set_rss_hash_opt(tp);
+	rtl8169_store_reta(tp);
+	rtl8169_store_rss_key(tp);
+}
+
 static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
 	/*
@@ -4004,6 +4154,20 @@ DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
 	return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
 }
 
+static void rtl8125_set_rx_q_num(struct rtl8169_private *tp)
+{
+	u16 q_ctrl;
+	u16 rx_q_num;
+
+	rx_q_num = (u16)ilog2(tp->num_rx_rings);
+	rx_q_num &= (BIT(0) | BIT(1) | BIT(2));
+	rx_q_num <<= 2;
+	q_ctrl = RTL_R16(tp, Q_NUM_CTRL_8125);
+	q_ctrl &= ~(BIT(2) | BIT(3) | BIT(4));
+	q_ctrl |= rx_q_num;
+	RTL_W16(tp, Q_NUM_CTRL_8125, q_ctrl);
+}
+
 static void rtl8125_hw_set_interrupt_type(struct rtl8169_private *tp)
 {
 	u8 tmp;
@@ -4043,6 +4207,12 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 	    tp->mac_version == RTL_GIGA_MAC_VER_80)
 		RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02);
 
+	/* enable rx descriptor type v4 and set queue num for rss*/
+	if (tp->rss_enable) {
+		rtl8125_set_rx_q_num(tp);
+		RTL_W8(tp, 0xd8, RTL_R8(tp, 0xd8) | 0x02);
+	}
+
 	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
 		r8168_mac_ocp_modify(tp, 0xe614, 0x0f00, 0x0f00);
 	else if (tp->mac_version == RTL_GIGA_MAC_VER_70)
@@ -4279,6 +4449,12 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
 	rtl_hw_aspm_clkreq_enable(tp, true);
 	rtl_set_rx_max_size(tp);
 	rtl_set_rx_tx_desc_registers(tp);
+	if (rtl_is_8125(tp)) {
+		if (tp->rss_enable)
+			rtl_set_rss_config(tp);
+		else
+			RTL_W32(tp, RSS_CTRL_8125, 0x00);
+	}
 	rtl_lock_config_regs(tp);
 
 	rtl_jumbo_config(tp);
@@ -4306,6 +4482,16 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static void rtl8169_mark_to_asic_rss(struct rx_desc_rss *descrss)
+{
+	u32 eor = le32_to_cpu(descrss->rx_desc_opts.opts1) & RingEnd;
+
+	descrss->rx_desc_opts.opts2 = 0;
+	/* Force memory writes to complete before releasing descriptor */
+	dma_wmb();
+	WRITE_ONCE(descrss->rx_desc_opts.opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
+}
+
 static void rtl8169_mark_to_asic_default(struct RxDesc *desc)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
@@ -4318,7 +4504,14 @@ static void rtl8169_mark_to_asic_default(struct RxDesc *desc)
 
 static void rtl8169_mark_to_asic(struct rtl8169_private *tp, struct RxDesc *desc)
 {
-	rtl8169_mark_to_asic_default(desc);
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		rtl8169_mark_to_asic_rss((struct rx_desc_rss *)desc);
+		break;
+	default:
+		rtl8169_mark_to_asic_default(desc);
+		break;
+	}
 }
 
 static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
@@ -4341,8 +4534,14 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 		return NULL;
 	}
 
-	desc->addr = cpu_to_le64(mapping);
 	ring->rx_desc_phy_addr[index] = mapping;
+	if (tp->init_rx_desc_type == RX_DESC_RING_TYPE_RSS) {
+		struct rx_desc_rss *descrss = (struct rx_desc_rss *)(ring->rx_desc_array) + index;
+
+		descrss->addr = cpu_to_le64(mapping);
+	} else {
+		desc->addr = cpu_to_le64(mapping);
+	}
 	rtl8169_mark_to_asic(tp, desc);
 
 	return data;
@@ -4369,9 +4568,21 @@ static void rtl8169_mark_as_last_descriptor_default(struct RxDesc *desc)
 	desc->opts1 |= cpu_to_le32(RingEnd);
 }
 
+static void rtl8169_mark_as_last_descriptor_rss(struct rx_desc_rss *descrss)
+{
+	descrss->rx_desc_opts.opts1 |= cpu_to_le32(RingEnd);
+}
+
 static void rtl8169_mark_as_last_descriptor(struct rtl8169_private *tp, struct RxDesc *desc)
 {
-	rtl8169_mark_as_last_descriptor_default(desc);
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		rtl8169_mark_as_last_descriptor_rss((struct rx_desc_rss *)desc);
+		break;
+	default:
+		rtl8169_mark_as_last_descriptor_default(desc);
+		break;
+	}
 }
 
 static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
@@ -5003,6 +5214,28 @@ static inline int rtl8169_fragmented_frame(u32 status)
 	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
 }
 
+static inline void rtl8169_rx_hash(struct rtl8169_private *tp,
+				   struct rx_desc_rss *desc,
+				   struct sk_buff *skb)
+{
+	u32 rss_header_info;
+	u32 hash_val;
+
+	if (!(tp->dev->features & NETIF_F_RXHASH))
+		return;
+
+	rss_header_info = le32_to_cpu(desc->rx_desc_rss_dword.rss_info);
+
+	if (!(rss_header_info & RXS_RSS_L3_TYPE_MASK))
+		return;
+
+	hash_val = le32_to_cpu(desc->rx_desc_rss_dword.rss_result);
+
+	skb_set_hash(skb, hash_val,
+		     (RXS_RSS_L4_TYPE_MASK & rss_header_info) ?
+		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
 static inline void rtl8169_rx_csum_default(struct rtl8169_private *tp,
 					   struct sk_buff *skb,
 					   struct RxDesc *desc)
@@ -5015,28 +5248,66 @@ static inline void rtl8169_rx_csum_default(struct rtl8169_private *tp,
 		skb_checksum_none_assert(skb);
 }
 
+static inline void rtl8169_rx_csum_rss(struct rtl8169_private *tp,
+				       struct sk_buff *skb,
+				       struct rx_desc_rss *descrss)
+{
+	u32 opts1 = le32_to_cpu(descrss->rx_desc_opts.opts1);
+
+	if (((opts1 & RX_TCPT_DESC_RSS) && !(opts1 & RX_TCPF_DESC_RSS)) ||
+	    ((opts1 & RX_UDPT_DESC_RSS) && !(opts1 & RX_UDPF_DESC_RSS)))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb_checksum_none_assert(skb);
+}
+
 static inline void rtl8169_rx_csum(struct rtl8169_private *tp,
 				   struct sk_buff *skb,
 				   struct RxDesc *desc)
 {
-	rtl8169_rx_csum_default(tp, skb, desc);
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		rtl8169_rx_csum_rss(tp, skb, (struct rx_desc_rss *)desc);
+		break;
+	default:
+		rtl8169_rx_csum_default(tp, skb, desc);
+		break;
+	}
 }
 
 static u32 rtl8169_rx_desc_opts1(struct rtl8169_private *tp, struct RxDesc *desc)
 {
-	return READ_ONCE(desc->opts1);
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		return READ_ONCE(((struct rx_desc_rss *)desc)->rx_desc_opts.opts1);
+	default:
+		return READ_ONCE(desc->opts1);
+	}
 }
 
 static bool rtl8169_check_rx_desc_error(struct net_device *dev,
 					struct rtl8169_private *tp,
 					u32 status)
 {
-	if (unlikely(status & RxRES)) {
-		if (status & (RxRWT | RxRUNT))
-			dev->stats.rx_length_errors++;
-		if (status & RxCRC)
-			dev->stats.rx_crc_errors++;
-		return true;
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		if (unlikely(status & RX_RES_RSS)) {
+			if (status & RX_RUNT_RSS)
+				dev->stats.rx_length_errors++;
+			if (status & RX_CRC_RSS)
+				dev->stats.rx_crc_errors++;
+			return true;
+		}
+		break;
+	default:
+		if (unlikely(status & RxRES)) {
+			if (status & (RxRWT | RxRUNT))
+				dev->stats.rx_length_errors++;
+			if (status & RxCRC)
+				dev->stats.rx_crc_errors++;
+			return true;
+		}
+		break;
 	}
 	return false;
 }
@@ -5045,7 +5316,14 @@ static inline void rtl8169_set_desc_dma_addr(struct rtl8169_private *tp,
 					     struct RxDesc *desc,
 					     dma_addr_t mapping)
 {
-	desc->addr = cpu_to_le64(mapping);
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		((struct rx_desc_rss *)desc)->addr = cpu_to_le64(mapping);
+		break;
+	default:
+		desc->addr = cpu_to_le64(mapping);
+		break;
+	}
 }
 
 static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
@@ -5127,10 +5405,13 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
 		skb->len = pkt_size;
 		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
+		if (tp->rss_enable)
+			rtl8169_rx_hash(tp, (struct rx_desc_rss *)desc, skb);
+
 		rtl8169_rx_csum(tp, skb, desc);
 		skb->protocol = eth_type_trans(skb, dev);
 
-		rtl8169_rx_vlan_tag(desc, skb);
+		rtl8169_rx_vlan_tag(tp, desc, skb);
 
 		if (skb->pkt_type == PACKET_MULTICAST)
 			dev->stats.multicast++;
@@ -5242,7 +5523,6 @@ static int rtl8169_request_irq(struct rtl8169_private *tp)
 
 		napi = &tp->r8169napi[i];
 		snprintf(irq->name, len, "%s-%d", dev->name, i);
-		irq->handler = rtl8169_interrupt;
 		rc = pci_request_irq(tp->pci_dev, i, irq->handler, NULL, napi, irq->name);
 		if (rc)
 			break;
@@ -5709,6 +5989,41 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_double_check_rss_support(struct rtl8169_private *tp)
+{
+	if (tp->hw_curr_isr_ver > 1) {
+		if (!(tp->features & RTL_VEC_MAP_ENABLE) || tp->irq_nvecs < tp->min_irq_nvecs)
+			tp->hw_curr_isr_ver = 1;
+	}
+
+	if (tp->rss_support && tp->hw_curr_isr_ver > 1) {
+		u8 rss_queue_num = netif_get_num_default_rss_queues();
+
+		tp->num_rx_rings = min(rss_queue_num, tp->hw_supp_num_rx_queues);
+		if (!(tp->num_rx_rings >= 2 && tp->irq_nvecs >= tp->min_irq_nvecs))
+			tp->num_rx_rings = 1;
+	}
+
+	tp->rss_enable = 0;
+
+	if (tp->num_rx_rings >= 2) {
+		tp->rss_enable = 1;
+		tp->init_rx_desc_type = RX_DESC_RING_TYPE_RSS;
+	} else if (tp->irq_nvecs > 1 && !tp->rss_support) {
+		pci_free_irq_vectors(tp->pci_dev);
+		tp->irq_nvecs = pci_alloc_irq_vectors(tp->pci_dev, 1, 1, PCI_IRQ_ALL_TYPES);
+
+		if (tp->irq_nvecs > 0) {
+			tp->irq = pci_irq_vector(tp->pci_dev, 0);
+		} else {
+			tp->irq = tp->pci_dev->irq;
+			tp->irq_nvecs = 1;
+		}
+
+		tp->features &= ~RTL_VEC_MAP_ENABLE;
+	}
+}
+
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
 	struct pci_dev *pdev = tp->pci_dev;
@@ -6177,6 +6492,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->dash_type = rtl_get_dash_type(tp);
 	tp->dash_enabled = rtl_dash_is_enabled(tp);
+	tp->rss_support = rtl_check_rss_support(tp);
 
 	tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
 
@@ -6198,6 +6514,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc < 0)
 		return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
 
+	rtl8169_double_check_rss_support(tp);
+
+	if (tp->rss_support)
+		rtl8169_init_rss(tp);
 
 	INIT_WORK(&tp->wk.work, rtl_task);
 	disable_work(&tp->wk.work);
@@ -6219,6 +6539,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
+	if (tp->rss_support) {
+		dev->hw_features |= NETIF_F_RXHASH;
+		dev->features |= NETIF_F_RXHASH;
+	}
+
 	/*
 	 * Pretend we are using VLANs; This bypasses a nasty bug where
 	 * Interrupts stop flowing on high load on 8110SCd controllers.
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Patch net-next v1 6/7] r8169: move struct ethtool_ops
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
                   ` (4 preceding siblings ...)
  2026-05-06  8:13 ` [Patch net-next v1 5/7] r8169: add support and enable rss javen
@ 2026-05-06  8:13 ` javen
  2026-05-06  8:13 ` [Patch net-next v1 7/7] r8169: add support for ethtool javen
  2026-05-06 21:02 ` [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 Heiner Kallweit
  7 siblings, 0 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch move struct ethtool_ops, no changes. Prepare for next patch.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 56 +++++++++++------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9b42cee24b8a..6e682a5538d3 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2558,34 +2558,6 @@ static int rtl8169_set_link_ksettings(struct net_device *ndev,
 	return 0;
 }
 
-static const struct ethtool_ops rtl8169_ethtool_ops = {
-	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
-				     ETHTOOL_COALESCE_MAX_FRAMES,
-	.get_drvinfo		= rtl8169_get_drvinfo,
-	.get_regs_len		= rtl8169_get_regs_len,
-	.get_link		= ethtool_op_get_link,
-	.get_coalesce		= rtl_get_coalesce,
-	.set_coalesce		= rtl_set_coalesce,
-	.get_regs		= rtl8169_get_regs,
-	.get_wol		= rtl8169_get_wol,
-	.set_wol		= rtl8169_set_wol,
-	.get_strings		= rtl8169_get_strings,
-	.get_sset_count		= rtl8169_get_sset_count,
-	.get_ethtool_stats	= rtl8169_get_ethtool_stats,
-	.get_ts_info		= ethtool_op_get_ts_info,
-	.nway_reset		= phy_ethtool_nway_reset,
-	.get_eee		= rtl8169_get_eee,
-	.set_eee		= rtl8169_set_eee,
-	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
-	.set_link_ksettings	= rtl8169_set_link_ksettings,
-	.get_ringparam		= rtl8169_get_ringparam,
-	.get_pause_stats	= rtl8169_get_pause_stats,
-	.get_pauseparam		= rtl8169_get_pauseparam,
-	.set_pauseparam		= rtl8169_set_pauseparam,
-	.get_eth_mac_stats	= rtl8169_get_eth_mac_stats,
-	.get_eth_ctrl_stats	= rtl8169_get_eth_ctrl_stats,
-};
-
 static const struct rtl_chip_info *rtl8169_get_chip_version(u32 xid, bool gmii)
 {
 	/* Chips combining a 1Gbps MAC with a 100Mbps PHY */
@@ -6400,6 +6372,34 @@ static void r8169_init_napi(struct rtl8169_private *tp)
 	}
 }
 
+static const struct ethtool_ops rtl8169_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_MAX_FRAMES,
+	.get_drvinfo		= rtl8169_get_drvinfo,
+	.get_regs_len		= rtl8169_get_regs_len,
+	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= rtl_get_coalesce,
+	.set_coalesce		= rtl_set_coalesce,
+	.get_regs		= rtl8169_get_regs,
+	.get_wol		= rtl8169_get_wol,
+	.set_wol		= rtl8169_set_wol,
+	.get_strings		= rtl8169_get_strings,
+	.get_sset_count		= rtl8169_get_sset_count,
+	.get_ethtool_stats	= rtl8169_get_ethtool_stats,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.nway_reset		= phy_ethtool_nway_reset,
+	.get_eee		= rtl8169_get_eee,
+	.set_eee		= rtl8169_set_eee,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= rtl8169_set_link_ksettings,
+	.get_ringparam		= rtl8169_get_ringparam,
+	.get_pause_stats	= rtl8169_get_pause_stats,
+	.get_pauseparam		= rtl8169_get_pauseparam,
+	.set_pauseparam		= rtl8169_set_pauseparam,
+	.get_eth_mac_stats	= rtl8169_get_eth_mac_stats,
+	.get_eth_ctrl_stats	= rtl8169_get_eth_ctrl_stats,
+};
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_chip_info *chip;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Patch net-next v1 7/7] r8169: add support for ethtool
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
                   ` (5 preceding siblings ...)
  2026-05-06  8:13 ` [Patch net-next v1 6/7] r8169: move struct ethtool_ops javen
@ 2026-05-06  8:13 ` javen
  2026-05-06 21:02 ` [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 Heiner Kallweit
  7 siblings, 0 replies; 14+ messages in thread
From: javen @ 2026-05-06  8:13 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch add support for changing rx queues by ethtool. We can set rx
1, 2, 4, 8 by ethtool -L eth1 rx num.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
 drivers/net/ethernet/realtek/r8169_main.c | 133 ++++++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 6e682a5538d3..305c5eaf16f8 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -6372,6 +6372,137 @@ static void r8169_init_napi(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_get_channels(struct net_device *dev,
+				 struct ethtool_channels *ch)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	ch->max_rx = tp->hw_supp_num_rx_queues;
+	ch->max_tx = 1;
+	ch->max_other = 0;
+	ch->max_combined = 0;
+
+	ch->rx_count = tp->num_rx_rings;
+	ch->tx_count = 1;
+	ch->other_count = 0;
+	ch->combined_count = 0;
+}
+
+static int rtl8169_realloc_rx(struct rtl8169_private *tp,
+			      struct rtl8169_rx_ring *new_rx,
+			      int new_count)
+{
+	int i, ret;
+
+	new_rx[0].rdsar_reg = RxDescAddrLow;
+	for (i = 1; i < new_count; i++)
+		new_rx[i].rdsar_reg = (u16)(RDSAR_Q1_LOW + (i - 1) * 8);
+
+	for (i = 0; i < new_count; i++)
+		new_rx[i].num_rx_desc = NUM_RX_DESC;
+
+	for (i = 0; i < new_count; i++) {
+		struct rtl8169_rx_ring *ring = &new_rx[i];
+
+		ring->rx_desc_alloc_size = (NUM_RX_DESC + 1) * sizeof(struct RxDesc);
+		ring->rx_desc_array = dma_alloc_coherent(&tp->pci_dev->dev,
+							 ring->rx_desc_alloc_size,
+							 &ring->rx_phy_addr,
+							 GFP_KERNEL);
+		if (!ring->rx_desc_array) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+
+		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
+		ret = rtl8169_rx_fill(tp, ring);
+		if (ret) {
+			dma_free_coherent(&tp->pci_dev->dev, ring->rx_desc_alloc_size,
+					  ring->rx_desc_array, ring->rx_phy_addr);
+			goto err_free;
+		}
+	}
+	return 0;
+
+err_free:
+	while (--i >= 0) {
+		rtl8169_rx_clear(tp, &new_rx[i]);
+		dma_free_coherent(&tp->pci_dev->dev, new_rx[i].rx_desc_alloc_size,
+				  new_rx[i].rx_desc_array, new_rx[i].rx_phy_addr);
+	}
+	return ret;
+}
+
+static int rtl8169_set_channels(struct net_device *dev,
+				struct ethtool_channels *ch)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	bool if_running = netif_running(dev);
+	struct rtl8169_rx_ring *new_rx;
+	u8 old_tx_desc_type = tp->init_rx_desc_type;
+	u8 new_desc_type;
+	bool new_rss_enable;
+	int i, ret;
+
+	if (!tp->rss_support && (ch->rx_count > 1 || ch->tx_count > 1)) {
+		netdev_warn(dev, "This chip does not support multiple channels/RSS.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!(tp->features & RTL_VEC_MAP_ENABLE))
+		return -EINVAL;
+
+	new_rss_enable = (ch->rx_count > 1 && tp->rss_support);
+	new_desc_type = new_rss_enable ? RX_DESC_RING_TYPE_RSS : RX_DESC_RING_TYPE_DEFAULT;
+	tp->init_rx_desc_type = new_desc_type;
+
+	if (!if_running) {
+		tp->num_rx_rings = ch->rx_count;
+		tp->rss_enable = new_rss_enable;
+		return 0;
+	}
+
+	new_rx = kcalloc(R8169_MAX_RX_QUEUES, sizeof(*new_rx), GFP_KERNEL);
+	if (!new_rx)
+		return -ENOMEM;
+
+	ret = rtl8169_realloc_rx(tp, new_rx, ch->rx_count);
+	if (ret) {
+		kfree(new_rx);
+		tp->init_rx_desc_type = old_tx_desc_type;
+		return ret;
+	}
+
+	netif_stop_queue(dev);
+	rtl8169_down(tp);
+
+	for (i = 0; i < tp->num_rx_rings; i++)
+		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
+	rtl8169_free_rx_desc(tp);
+
+	tp->num_rx_rings = ch->rx_count;
+	tp->rss_enable = new_rss_enable;
+
+	memset(tp->rx_ring, 0, sizeof(tp->rx_ring));
+	memcpy(tp->rx_ring, new_rx, sizeof(*new_rx) * ch->rx_count);
+
+	for (i = 0; i < tp->hw_supp_indir_tbl_entries; i++) {
+		if (tp->rss_enable)
+			tp->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, tp->num_rx_rings);
+		else
+			tp->rss_indir_tbl[i] = 0;
+	}
+
+	rtl_set_irq_mask(tp);
+
+	rtl8169_up(tp);
+	netif_start_queue(dev);
+
+	kfree(new_rx);
+
+	return 0;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -6390,6 +6521,8 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.nway_reset		= phy_ethtool_nway_reset,
 	.get_eee		= rtl8169_get_eee,
 	.set_eee		= rtl8169_set_eee,
+	.get_channels		= rtl8169_get_channels,
+	.set_channels		= rtl8169_set_channels,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= rtl8169_set_link_ksettings,
 	.get_ringparam		= rtl8169_get_ringparam,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Patch net-next v1 0/7] r8169: add RSS support for RTL8127
  2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
                   ` (6 preceding siblings ...)
  2026-05-06  8:13 ` [Patch net-next v1 7/7] r8169: add support for ethtool javen
@ 2026-05-06 21:02 ` Heiner Kallweit
  2026-05-07  2:19   ` Javen
  7 siblings, 1 reply; 14+ messages in thread
From: Heiner Kallweit @ 2026-05-06 21:02 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 06.05.2026 10:13, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> This patch series adds RSS (Receive Side Scaling) support for the r8169
> ethernet driver, specifically for RTL8127 (RTL_GIGA_MAC_VER_80).

Series adds RSS support for RTL8127 only. Is this generic enough to retrofit
RSS support for other chip versions like RTL8126 w/o bigger refactoring?

> 
> RSS enables packet distribution across multiple receive queues, which can
> significantly improve network throughput on multi-core systems by allowing
> parallel processing of incoming packets.
> 
> Key features:
> - Multi-queue RX support (up to 8 queues)
> - MSI-X interrupt with vector mapping
> - Dynamic queue configuration via ethtool (-L)
> - RSS hash computation for flow classification
> 
> Experiments:
> Platform: AMD Ryzen Embedded R2514 with Radeon Graphics(4 Cores/8 Threads)
> Arch: x86_64
> Test command: 
>   Server: iperf3 -s
>   Client: iperf3 -c 192.168.2.1 -P 20 -t 3600
> Monitor: mpstat -P ALL 1
> 
> Before this patch (Without RSS):
>   Throughput: Unstable, fluctuating between 3.76 Gbits/sec and
>   8.2 Gbits/sec.
>   CPU Usage: A single CPU core is fully occupied with softirq reaching 
>   up to 96%.
> 
> After this patch (With RSS enabled):
>   Throughput: Stable at 9.42 Gbits/sec.
>   CPU Usage: The traffic load is evenly distributed across multiple CPU
>   cores. The maximum softirq on a single core dropped to 63%.
>   
> Other Experiments:
> Link: https://lore.kernel.org/netdev/0A5279953D81BB9C+f50c9b49-3e5d-467f-b69a-7e49ed223383@radxa.com/
> 
> Javen Xu (7):
>   r8169: add support for multi irqs
>   r8169: add support for multi rx queues
>   r8169: add support for new interrupt mapping
>   r8169: enable new interrupt mapping
>   r8169: add support and enable rss
>   r8169: move struct ethtool_ops
>   r8169: add support for ethtool
> 
>  drivers/net/ethernet/realtek/r8169_main.c | 1202 ++++++++++++++++++---
>  1 file changed, 1080 insertions(+), 122 deletions(-)
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch net-next v1 1/7] r8169: add support for multi irqs
  2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
@ 2026-05-06 21:28   ` Heiner Kallweit
  2026-05-06 22:53   ` Jakub Kicinski
  1 sibling, 0 replies; 14+ messages in thread
From: Heiner Kallweit @ 2026-05-06 21:28 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 06.05.2026 10:13, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> RSS uses multi rx queues to receive packets, and each rx queue needs one
> irq and napi. So this patch adds support for multi irqs and napi here.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 199 ++++++++++++++++++++--
>  1 file changed, 184 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 791277e750ba..ef74ee02c117 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -77,6 +77,7 @@
>  #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
>  #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
>  #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
> +#define R8169_MAX_MSIX_VEC	32
>  
>  #define OCP_STD_PHY_BASE	0xa400
>  
> @@ -435,6 +436,8 @@ enum rtl8125_registers {
>  #define INT_CFG0_CLKREQEN		BIT(3)
>  	IntrMask_8125		= 0x38,
>  	IntrStatus_8125		= 0x3c,
> +	INTR_VEC_MAP_MASK	= 0x800,
> +	INTR_VEC_MAP_STATUS	= 0x802,

These register names don't have a chip version reference.
Does this mean they can be used on other chip versions
with RSS as well?

>  	INT_CFG1_8125		= 0x7a,
>  	LEDSEL2			= 0x84,
>  	LEDSEL1			= 0x86,
> @@ -728,6 +731,19 @@ enum rtl_dash_type {
>  	RTL_DASH_25_BP,
>  };
>  
> +struct rtl8169_napi {
> +	struct napi_struct napi;
> +	void *priv;
> +	int index;

It seems the index is never used in this patch.

> +};
> +
> +struct rtl8169_irq {
> +	irq_handler_t	handler;
> +	unsigned int	vector;
> +	u8		requested;
> +	char		name[IFNAMSIZ + 10];
> +};
> +
>  struct rtl8169_private {
>  	void __iomem *mmio_addr;	/* memory map physical address */
>  	struct pci_dev *pci_dev;
> @@ -745,9 +761,19 @@ struct rtl8169_private {
>  	dma_addr_t RxPhyAddr;
>  	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
> +	struct rtl8169_irq irq_tbl[R8169_MAX_MSIX_VEC];
> +	struct rtl8169_napi r8169napi[R8169_MAX_MSIX_VEC];
> +	u16 isr_reg[R8169_MAX_MSIX_VEC];
> +	u16 imr_reg[R8169_MAX_MSIX_VEC];

These arrays result in unecessarily high memory consumption on all other
chip versions. Can't they be dynamically allocated, only in case driver
supports RSS for the respective chip version?

> +	unsigned int num_rx_rings;
>  	u16 cp_cmd;
>  	u16 tx_lpi_timer;
>  	u32 irq_mask;
> +	u8 min_irq_nvecs;
> +	u8 max_irq_nvecs;

It seems these values are actually constants.
Can't we avoid these members?

> +	u8 hw_supp_isr_ver;
> +	u8 hw_curr_isr_ver;
> +	u8 irq_nvecs;
>  	int irq;
>  	struct clk *clk;
>  
> @@ -763,6 +789,8 @@ struct rtl8169_private {
>  	unsigned aspm_manageable:1;
>  	unsigned dash_enabled:1;
>  	bool sfp_mode:1;
> +	bool rss_support:1;
> +	bool rss_enable:1;
>  	dma_addr_t counters_phys_addr;
>  	struct rtl8169_counters *counters;
>  	struct rtl8169_tc_offsets tc_offset;
> @@ -2680,6 +2708,44 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
>  	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
>  }
>  
> +static void rtl_setup_mqs_reg(struct rtl8169_private *tp)
> +{
> +	if (tp->mac_version <= RTL_GIGA_MAC_VER_52) {
> +		tp->isr_reg[0] = IntrStatus;
> +		tp->imr_reg[0] = IntrMask;
> +	} else {
> +		tp->isr_reg[0] = IntrStatus_8125;
> +		tp->imr_reg[0] = IntrMask_8125;
> +	}
> +
> +	for (int i = 1; i < tp->max_irq_nvecs; i++)
> +		tp->isr_reg[i] = (u16)(INTR_VEC_MAP_STATUS + (i - 1) * 4);
> +
> +	for (int i = 1; i < tp->max_irq_nvecs; i++)
> +		tp->imr_reg[i] = (u16)(INTR_VEC_MAP_MASK + (i - 1) * 4);

This populates the array with constant values. Therefore, can't you avoid
using this array?

> +}
> +
> +static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
> +{
> +	tp->num_rx_rings = 1;
> +
> +	switch (tp->mac_version) {
> +	case RTL_GIGA_MAC_VER_80:
> +		tp->min_irq_nvecs = 1;
> +		tp->max_irq_nvecs = 1;
> +		tp->hw_supp_isr_ver = 6;

Magic value 6 requires at least an explanation and a constant.

> +		break;
> +	default:
> +		tp->min_irq_nvecs = 1;
> +		tp->max_irq_nvecs = 1;
> +		tp->hw_supp_isr_ver = 1;
> +		break;
> +	}
> +	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;

This indicates that the current version can be set to a version
which is not the supported one. This is misleading.
- Is supp_isr_ver the highest supported isr version?
- And does this mean that each chip is backwards-compatible and
  supports also all lower isr versions?

> +
> +	rtl_setup_mqs_reg(tp);
> +}
> +
>  static void rtl_request_firmware(struct rtl8169_private *tp)
>  {
>  	struct rtl_fw *rtl_fw;
> @@ -4266,9 +4332,21 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
>  	netdev_reset_queue(tp->dev);
>  }
>  
> +static void rtl8169_napi_disable(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		napi_disable(&tp->r8169napi[i].napi);
> +}
> +
> +static void rtl8169_napi_enable(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		napi_enable(&tp->r8169napi[i].napi);
> +}
> +
>  static void rtl8169_cleanup(struct rtl8169_private *tp)
>  {
> -	napi_disable(&tp->napi);
> +	rtl8169_napi_disable(tp);
>  
>  	/* Give a racing hard_start_xmit a few cycles to complete. */
>  	synchronize_net();
> @@ -4313,8 +4391,8 @@ static void rtl_reset_work(struct rtl8169_private *tp)
>  
>  	for (i = 0; i < NUM_RX_DESC; i++)
>  		rtl8169_mark_to_asic(tp->RxDescArray + i);
> +	rtl8169_napi_enable(tp);
>  
> -	napi_enable(&tp->napi);

This moves the empty line. It should remain where it is.

>  	rtl_hw_start(tp);
>  }
>  
> @@ -4820,7 +4898,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  			goto release_descriptor;
>  		}
>  
> -		skb = napi_alloc_skb(&tp->napi, pkt_size);
> +		skb = napi_alloc_skb(&tp->r8169napi[0].napi, pkt_size);
>  		if (unlikely(!skb)) {
>  			dev->stats.rx_dropped++;
>  			goto release_descriptor;
> @@ -4844,7 +4922,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (skb->pkt_type == PACKET_MULTICAST)
>  			dev->stats.multicast++;
>  
> -		napi_gro_receive(&tp->napi, skb);
> +		napi_gro_receive(&tp->r8169napi[0].napi, skb);
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
>  release_descriptor:
> @@ -4856,7 +4934,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  
>  static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
> -	struct rtl8169_private *tp = dev_instance;
> +	struct rtl8169_napi *napi = dev_instance;
> +	struct rtl8169_private *tp = napi->priv;
>  	u32 status = rtl_get_events(tp);
>  
>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
> @@ -4873,13 +4952,53 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  		phy_mac_interrupt(tp->phydev);
>  
>  	rtl_irq_disable(tp);
> -	napi_schedule(&tp->napi);
> +	napi_schedule(&napi->napi);
>  out:
>  	rtl_ack_events(tp, status);
>  
>  	return IRQ_HANDLED;
>  }
>  
> +static void rtl8169_free_irq(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		struct rtl8169_irq *irq = &tp->irq_tbl[i];
> +		struct rtl8169_napi *napi = &tp->r8169napi[i];
> +
> +		if (irq->requested) {

Is this check actually needed? Wouldn't pci_free_irq()
also be fine with irqs not having been requested?

> +			irq->requested = 0;
> +			pci_free_irq(tp->pci_dev, i, napi);
> +		}
> +	}
> +}
> +
> +static int rtl8169_request_irq(struct rtl8169_private *tp)
> +{
> +	const int len = sizeof(tp->irq_tbl[0].name);
> +	struct net_device *dev = tp->dev;
> +	struct rtl8169_napi *napi;
> +	struct rtl8169_irq *irq;
> +	int rc = 0;
> +
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		irq = &tp->irq_tbl[i];
> +
> +		napi = &tp->r8169napi[i];
> +		snprintf(irq->name, len, "%s-%d", dev->name, i);

I don't think this is needed. pci_request_irq() supports dynamic
irq name generation.

> +		irq->handler = rtl8169_interrupt;
> +		rc = pci_request_irq(tp->pci_dev, i, irq->handler, NULL, napi, irq->name);
> +		if (rc)
> +			break;
> +
> +		irq->vector = pci_irq_vector(tp->pci_dev, i);
> +		irq->requested = 1;
> +	}
> +
> +	if (rc)
> +		rtl8169_free_irq(tp);
> +	return rc;
> +}
> +
>  static void rtl_task(struct work_struct *work)
>  {
>  	struct rtl8169_private *tp =
> @@ -4914,9 +5033,10 @@ static void rtl_task(struct work_struct *work)
>  
>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>  {
> -	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> +	struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi, napi);
> +	struct rtl8169_private *tp = r8169_napi->priv;
>  	struct net_device *dev = tp->dev;
> -	int work_done;
> +	int work_done = 0;
>  
>  	rtl_tx(dev, tp, budget);
>  
> @@ -5035,7 +5155,7 @@ static void rtl8169_up(struct rtl8169_private *tp)
>  	phy_init_hw(tp->phydev);
>  	phy_resume(tp->phydev);
>  	rtl8169_init_phy(tp);
> -	napi_enable(&tp->napi);
> +	rtl8169_napi_enable(tp);
>  	enable_work(&tp->wk.work);
>  	rtl_reset_work(tp);
>  
> @@ -5053,7 +5173,7 @@ static int rtl8169_close(struct net_device *dev)
>  	rtl8169_down(tp);
>  	rtl8169_rx_clear(tp);
>  
> -	free_irq(tp->irq, tp);
> +	rtl8169_free_irq(tp);
>  
>  	phy_disconnect(tp->phydev);
>  
> @@ -5108,7 +5228,8 @@ static int rtl_open(struct net_device *dev)
>  	rtl_request_firmware(tp);
>  
>  	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
> -	retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
> +
> +	retval = rtl8169_request_irq(tp);
>  	if (retval < 0)
>  		goto err_release_fw_2;
>  
> @@ -5125,7 +5246,7 @@ static int rtl_open(struct net_device *dev)
>  	return retval;
>  
>  err_free_irq:
> -	free_irq(tp->irq, tp);
> +	rtl8169_free_irq(tp);
>  err_release_fw_2:
>  	rtl_release_firmware(tp);
>  	rtl8169_rx_clear(tp);
> @@ -5328,7 +5449,9 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
>  
>  static int rtl_alloc_irq(struct rtl8169_private *tp)
>  {
> +	struct pci_dev *pdev = tp->pci_dev;
>  	unsigned int flags;
> +	int nvecs;
>  
>  	switch (tp->mac_version) {
>  	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
> @@ -5344,7 +5467,18 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>  		break;
>  	}
>  
> -	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
> +	nvecs = pci_alloc_irq_vectors(pdev, tp->min_irq_nvecs, tp->max_irq_nvecs, flags);
> +
> +	if (nvecs < 0)
> +		nvecs = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);

This may be dangerous. If the first allocation fails, you may here allocate an
interrupt of a type not supported by the chip.

> +
> +	if (nvecs < 0)
> +		return nvecs;
> +
> +	tp->irq = pci_irq_vector(pdev, 0);
> +	tp->irq_nvecs = nvecs;
> +
> +	return 0;
>  }
>  
>  static void rtl_read_mac_address(struct rtl8169_private *tp,
> @@ -5539,6 +5673,17 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static int rtl8169_set_real_num_queue(struct rtl8169_private *tp)
> +{
> +	int retval;
> +
> +	retval = netif_set_real_num_tx_queues(tp->dev, 1);
> +	if (retval < 0)
> +		return retval;
> +
> +	return netif_set_real_num_rx_queues(tp->dev, tp->num_rx_rings);
> +}
> +
>  static int rtl_jumbo_max(struct rtl8169_private *tp)
>  {
>  	/* Non-GBit versions don't support jumbo frames */
> @@ -5599,6 +5744,19 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
>  	return false;
>  }
>  
> +static void r8169_init_napi(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		struct rtl8169_napi *r8169napi = &tp->r8169napi[i];
> +		int (*poll)(struct napi_struct *napi, int budget);
> +
> +		poll = rtl8169_poll;
> +		netif_napi_add(tp->dev, &r8169napi->napi, poll);
> +		r8169napi->priv = tp;
> +		r8169napi->index = i;
> +	}
> +}
> +
>  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>  	const struct rtl_chip_info *chip;
> @@ -5703,11 +5861,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	rtl_hw_reset(tp);
>  
> +	rtl_software_parameter_initialize(tp);
> +
>  	rc = rtl_alloc_irq(tp);
>  	if (rc < 0)
>  		return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
>  
> -	tp->irq = pci_irq_vector(pdev, 0);
>  
>  	INIT_WORK(&tp->wk.work, rtl_task);
>  	disable_work(&tp->wk.work);
> @@ -5716,7 +5875,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	dev->ethtool_ops = &rtl8169_ethtool_ops;
>  
> -	netif_napi_add(dev, &tp->napi, rtl8169_poll);
> +	if (!tp->rss_support) {
> +		netif_napi_add(dev, &tp->r8169napi[0].napi, rtl8169_poll);
> +		tp->r8169napi[0].priv = tp;
> +		tp->r8169napi[0].index = 0;
> +	} else {
> +		r8169_init_napi(tp);
> +	}
>  
>  	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>  			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
> @@ -5778,6 +5943,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	if (jumbo_max)
>  		dev->max_mtu = jumbo_max;
>  
> +	rc = rtl8169_set_real_num_queue(tp);
> +	if (rc < 0)
> +		return dev_err_probe(&pdev->dev, rc, "set tx/rx num failure\n");
> +
>  	rtl_set_irq_mask(tp);
>  
>  	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch net-next v1 2/7] r8169: add support for multi rx queues
  2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
@ 2026-05-06 21:45   ` Heiner Kallweit
  2026-05-06 22:54   ` Jakub Kicinski
  1 sibling, 0 replies; 14+ messages in thread
From: Heiner Kallweit @ 2026-05-06 21:45 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 06.05.2026 10:13, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> This patch adds support for multi rx queues. RSS requires multi rx
> queues to receive packets. So we need struct rtl8169_rx_ring for each
> queue.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 318 +++++++++++++++++-----
>  1 file changed, 251 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index ef74ee02c117..bc75dbb9901d 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -74,10 +74,11 @@
>  #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
>  #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
>  #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
> -#define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
>  #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
>  #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
> +#define R8169_MAX_RX_QUEUES	8
>  #define R8169_MAX_MSIX_VEC	32
> +#define R8127_MAX_RX_QUEUES	8

Why two MAX_RX_QUEUES constants with the same value?

>  
>  #define OCP_STD_PHY_BASE	0xa400
>  
> @@ -447,6 +448,7 @@ enum rtl8125_registers {
>  	RSS_CTRL_8125		= 0x4500,
>  	Q_NUM_CTRL_8125		= 0x4800,
>  	EEE_TXIDLE_TIMER_8125	= 0x6048,
> +	RDSAR_Q1_LOW		= 0x4000,

Better sort register ids by register number?

>  };
>  
>  #define LEDSEL_MASK_8125	0x23f
> @@ -731,6 +733,19 @@ enum rtl_dash_type {
>  	RTL_DASH_25_BP,
>  };
>  
> +struct rtl8169_rx_ring {
> +	u32 index;					/* Rx queue index */
> +	u32 cur_rx;					/* Index of next Rx pkt. */
> +	u32 dirty_rx;					/* Index for recycling. */
> +	u32 num_rx_desc;				/* num of Rx desc */
> +	struct RxDesc *rx_desc_array;			/* array of Rx Desc*/
> +	u32 rx_desc_alloc_size;				/* memory size per descs of ring */
> +	dma_addr_t rx_desc_phy_addr[NUM_RX_DESC];	/* Rx data buffer physical dma address */
> +	dma_addr_t rx_phy_addr;				/* Rx desc physical address */
> +	struct page *rx_databuff[NUM_RX_DESC];		/* Rx data buffers */
> +	u16 rdsar_reg;					/* Receive Descriptor Start Address */
> +};
> +
>  struct rtl8169_napi {
>  	struct napi_struct napi;
>  	void *priv;
> @@ -744,6 +759,13 @@ struct rtl8169_irq {
>  	char		name[IFNAMSIZ + 10];
>  };
>  
> +enum rx_desc_ring_type {
> +	RX_DESC_RING_TYPE_UNKNOWN = 0,
> +	RX_DESC_RING_TYPE_DEFAULT,
> +	RX_DESC_RING_TYPE_RSS,
> +	RX_DESC_RING_TYPE_MAX

UNKNOWN and MAX seem to never be used in this series.
So do we need them?

> +};
> +
>  struct rtl8169_private {
>  	void __iomem *mmio_addr;	/* memory map physical address */
>  	struct pci_dev *pci_dev;
> @@ -752,28 +774,28 @@ struct rtl8169_private {
>  	struct napi_struct napi;
>  	enum mac_version mac_version;
>  	enum rtl_dash_type dash_type;
> -	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
>  	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
>  	u32 dirty_tx;
>  	struct TxDesc *TxDescArray;	/* 256-aligned Tx descriptor ring */
> -	struct RxDesc *RxDescArray;	/* 256-aligned Rx descriptor ring */
>  	dma_addr_t TxPhyAddr;
> -	dma_addr_t RxPhyAddr;
> -	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
>  	struct rtl8169_irq irq_tbl[R8169_MAX_MSIX_VEC];
>  	struct rtl8169_napi r8169napi[R8169_MAX_MSIX_VEC];
> +	struct rtl8169_rx_ring rx_ring[R8169_MAX_RX_QUEUES];
>  	u16 isr_reg[R8169_MAX_MSIX_VEC];
>  	u16 imr_reg[R8169_MAX_MSIX_VEC];
>  	unsigned int num_rx_rings;
>  	u16 cp_cmd;
>  	u16 tx_lpi_timer;
>  	u32 irq_mask;
> +	u16 hw_supp_num_rx_queues;
>  	u8 min_irq_nvecs;
>  	u8 max_irq_nvecs;
>  	u8 hw_supp_isr_ver;
>  	u8 hw_curr_isr_ver;
>  	u8 irq_nvecs;
> +	u8 init_rx_desc_type;
> +	u8 recheck_desc_ownbit;

This seems to be a flag. Then why type u8?

>  	int irq;
>  	struct clk *clk;
>  
> @@ -2647,9 +2669,27 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static void rtl8169_rx_desc_init(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		memset(ring->rx_desc_array, 0x0, ring->rx_desc_alloc_size);
> +	}
> +}
> +
>  static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
>  {
> -	tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
> +	tp->dirty_tx = 0;
> +	tp->cur_tx = 0;
> +
> +	for (int i = 0; i < tp->hw_supp_num_rx_queues; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		ring->dirty_rx = 0;
> +		ring->cur_rx = 0;
> +		ring->index = i;
> +	}
>  }
>  
>  static void rtl_jumbo_config(struct rtl8169_private *tp)
> @@ -2708,8 +2748,18 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
>  	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
>  }
>  
> +static void rtl_set_ring_size(struct rtl8169_private *tp, u32 rx_num)
> +{
> +	for (int i = 0; i < tp->hw_supp_num_rx_queues; i++)
> +		tp->rx_ring[i].num_rx_desc = rx_num;
> +}
> +
>  static void rtl_setup_mqs_reg(struct rtl8169_private *tp)
>  {
> +	tp->rx_ring[0].rdsar_reg = RxDescAddrLow;
> +	for (int i = 1; i < tp->hw_supp_num_rx_queues; i++)
> +		tp->rx_ring[i].rdsar_reg = (u16)(RDSAR_Q1_LOW + (i - 1) * 8);

This looks like array rx_ring[] isn't actually needed.

> +
>  	if (tp->mac_version <= RTL_GIGA_MAC_VER_52) {
>  		tp->isr_reg[0] = IntrStatus;
>  		tp->imr_reg[0] = IntrMask;
> @@ -2733,17 +2783,21 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
>  	case RTL_GIGA_MAC_VER_80:
>  		tp->min_irq_nvecs = 1;
>  		tp->max_irq_nvecs = 1;
> +		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
>  		tp->hw_supp_isr_ver = 6;
>  		break;
>  	default:
>  		tp->min_irq_nvecs = 1;
>  		tp->max_irq_nvecs = 1;
> +		tp->hw_supp_num_rx_queues = 1;
>  		tp->hw_supp_isr_ver = 1;
>  		break;
>  	}
> +	tp->init_rx_desc_type = RX_DESC_RING_TYPE_DEFAULT;
>  	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
>  
>  	rtl_setup_mqs_reg(tp);
> +	rtl_set_ring_size(tp, NUM_RX_DESC);
>  }
>  
>  static void rtl_request_firmware(struct rtl8169_private *tp)
> @@ -2877,8 +2931,13 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
>  	 */
>  	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
>  	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
> -	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
> -	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
> +
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		RTL_W32(tp, ring->rdsar_reg, ((u64)ring->rx_phy_addr) & DMA_BIT_MASK(32));
> +		RTL_W32(tp, ring->rdsar_reg + 4, ((u64)ring->rx_phy_addr >> 32));
> +	}
>  }
>  
>  static void rtl8169_set_magic_reg(struct rtl8169_private *tp)
> @@ -4214,7 +4273,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
>  	return 0;
>  }
>  
> -static void rtl8169_mark_to_asic(struct RxDesc *desc)
> +static void rtl8169_mark_to_asic_default(struct RxDesc *desc)
>  {
>  	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
>  
> @@ -4224,13 +4283,19 @@ static void rtl8169_mark_to_asic(struct RxDesc *desc)
>  	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
>  }
>  
> +static void rtl8169_mark_to_asic(struct rtl8169_private *tp, struct RxDesc *desc)
> +{
> +	rtl8169_mark_to_asic_default(desc);
> +}
> +
>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
> -					  struct RxDesc *desc)
> +					  struct rtl8169_rx_ring *ring, unsigned int index)
>  {
>  	struct device *d = tp_to_dev(tp);
>  	int node = dev_to_node(d);
>  	dma_addr_t mapping;
>  	struct page *data;
> +	struct RxDesc *desc = ring->rx_desc_array + index;
>  
>  	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
>  	if (!data)
> @@ -4244,55 +4309,111 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  	}
>  
>  	desc->addr = cpu_to_le64(mapping);
> -	rtl8169_mark_to_asic(desc);
> +	ring->rx_desc_phy_addr[index] = mapping;
> +	rtl8169_mark_to_asic(tp, desc);
>  
>  	return data;
>  }
>  
> -static void rtl8169_rx_clear(struct rtl8169_private *tp)
> +static void rtl8169_rx_clear(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
>  {
>  	int i;
>  
> -	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
> +	for (i = 0; i < NUM_RX_DESC && ring->rx_databuff[i]; i++) {
>  		dma_unmap_page(tp_to_dev(tp),
> -			       le64_to_cpu(tp->RxDescArray[i].addr),
> +			       ring->rx_desc_phy_addr[i],
>  			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> -		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
> -		tp->Rx_databuff[i] = NULL;
> -		tp->RxDescArray[i].addr = 0;
> -		tp->RxDescArray[i].opts1 = 0;
> +		__free_pages(ring->rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
> +		ring->rx_databuff[i] = NULL;
> +		ring->rx_desc_phy_addr[i] = 0;
> +		ring->rx_desc_array[i].addr = 0;
> +		ring->rx_desc_array[i].opts1 = 0;
>  	}
>  }
>  
> -static int rtl8169_rx_fill(struct rtl8169_private *tp)
> +static void rtl8169_mark_as_last_descriptor_default(struct RxDesc *desc)
> +{
> +	desc->opts1 |= cpu_to_le32(RingEnd);
> +}
> +
> +static void rtl8169_mark_as_last_descriptor(struct rtl8169_private *tp, struct RxDesc *desc)
> +{
> +	rtl8169_mark_as_last_descriptor_default(desc);
> +}
> +

Do we actually need this in this patch?

> +static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
>  {
>  	int i;
>  
>  	for (i = 0; i < NUM_RX_DESC; i++) {
>  		struct page *data;
>  
> -		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
> +		data = rtl8169_alloc_rx_data(tp, ring, i);
>  		if (!data) {
> -			rtl8169_rx_clear(tp);
> +			rtl8169_rx_clear(tp, ring);
>  			return -ENOMEM;
>  		}
> -		tp->Rx_databuff[i] = data;
> +		ring->rx_databuff[i] = data;
>  	}
>  
>  	/* mark as last descriptor in the ring */
> -	tp->RxDescArray[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
> +	rtl8169_mark_as_last_descriptor(tp, &ring->rx_desc_array[NUM_RX_DESC - 1]);
> +
> +	return 0;
> +}
> +
> +static int rtl8169_alloc_rx_desc(struct rtl8169_private *tp)
> +{
> +	struct rtl8169_rx_ring *ring;
> +	struct pci_dev *pdev = tp->pci_dev;
>  
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		ring = &tp->rx_ring[i];
> +		ring->rx_desc_alloc_size = (ring->num_rx_desc + 1) * sizeof(struct RxDesc);
> +		ring->rx_desc_array = dma_alloc_coherent(&pdev->dev,
> +							 ring->rx_desc_alloc_size,
> +							 &ring->rx_phy_addr,
> +							 GFP_KERNEL);
> +		if (!ring->rx_desc_array)
> +			return -1;
> +	}
>  	return 0;
>  }
>  
> +static void rtl8169_free_rx_desc(struct rtl8169_private *tp)
> +{
> +	struct rtl8169_rx_ring *ring;
> +	struct pci_dev *pdev = tp->pci_dev;
> +
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		ring = &tp->rx_ring[i];
> +		if (ring->rx_desc_array) {
> +			dma_free_coherent(&pdev->dev,
> +					  ring->rx_desc_alloc_size,
> +					  ring->rx_desc_array,
> +					  ring->rx_phy_addr);
> +			ring->rx_desc_array = NULL;
> +		}
> +	}
> +}
> +
>  static int rtl8169_init_ring(struct rtl8169_private *tp)
>  {
> +	int retval = 0;
> +
>  	rtl8169_init_ring_indexes(tp);
> +	rtl8169_rx_desc_init(tp);
>  
>  	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
> -	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
>  
> -	return rtl8169_rx_fill(tp);
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
> +		retval = rtl8169_rx_fill(tp, ring);
> +	}
> +
> +	return retval;
>  }
>  
>  static void rtl8169_unmap_tx_skb(struct rtl8169_private *tp, unsigned int entry)
> @@ -4381,16 +4502,24 @@ static void rtl8169_cleanup(struct rtl8169_private *tp)
>  	rtl8169_init_ring_indexes(tp);
>  }
>  
> -static void rtl_reset_work(struct rtl8169_private *tp)
> +static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
>  {
> -	int i;
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>  
> +		for (int j = 0; j < ring->num_rx_desc; j++)
> +			rtl8169_mark_to_asic(tp, ring->rx_desc_array + j);
> +	}
> +}
> +
> +static void rtl_reset_work(struct rtl8169_private *tp)
> +{
>  	netif_stop_queue(tp->dev);
>  
>  	rtl8169_cleanup(tp);
>  
> -	for (i = 0; i < NUM_RX_DESC; i++)
> -		rtl8169_mark_to_asic(tp->RxDescArray + i);
> +	rtl8169_rx_desc_reset(tp);
> +
>  	rtl8169_napi_enable(tp);
>  
>  	rtl_hw_start(tp);
> @@ -4784,6 +4913,11 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
>  	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
>  }
>  
> +static void rtl8169_desc_quirk(struct rtl8169_private *tp)
> +{
> +	RTL_R8(tp, tp->imr_reg[0]);
> +}
> +
>  static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
>  		   int budget)
>  {
> @@ -4836,9 +4970,11 @@ static inline int rtl8169_fragmented_frame(u32 status)
>  	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
>  }
>  
> -static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
> +static inline void rtl8169_rx_csum_default(struct rtl8169_private *tp,
> +					   struct sk_buff *skb,
> +					   struct RxDesc *desc)
>  {
> -	u32 status = opts1 & (RxProtoMask | RxCSFailMask);
> +	u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
>  
>  	if (status == RxProtoTCP || status == RxProtoUDP)
>  		skb->ip_summed = CHECKSUM_UNNECESSARY;
> @@ -4846,22 +4982,71 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
>  		skb_checksum_none_assert(skb);
>  }
>  
> -static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget)
> +static inline void rtl8169_rx_csum(struct rtl8169_private *tp,
> +				   struct sk_buff *skb,
> +				   struct RxDesc *desc)
> +{
> +	rtl8169_rx_csum_default(tp, skb, desc);
> +}
> +
> +static u32 rtl8169_rx_desc_opts1(struct rtl8169_private *tp, struct RxDesc *desc)
> +{
> +	return READ_ONCE(desc->opts1);
> +}

I don't see which benefit the helper provides. Instead it may cause side effects
due to the READ_ONCE().

> +
> +static bool rtl8169_check_rx_desc_error(struct net_device *dev,
> +					struct rtl8169_private *tp,
> +					u32 status)
> +{
> +	if (unlikely(status & RxRES)) {
> +		if (status & (RxRWT | RxRUNT))
> +			dev->stats.rx_length_errors++;
> +		if (status & RxCRC)
> +			dev->stats.rx_crc_errors++;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +static inline void rtl8169_set_desc_dma_addr(struct rtl8169_private *tp,
> +					     struct RxDesc *desc,
> +					     dma_addr_t mapping)
> +{
> +	desc->addr = cpu_to_le64(mapping);
> +}

Argument tp isn't used. Why is it there?

> +
> +static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
> +		  struct rtl8169_rx_ring *ring, int budget)
>  {
>  	struct device *d = tp_to_dev(tp);
>  	int count;
>  
> -	for (count = 0; count < budget; count++, tp->cur_rx++) {
> -		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
> -		struct RxDesc *desc = tp->RxDescArray + entry;
> +	for (count = 0; count < budget; count++, ring->cur_rx++) {
> +		unsigned int pkt_size, entry = ring->cur_rx % ring->num_rx_desc;
> +		struct RxDesc *desc = ring->rx_desc_array + entry;
>  		struct sk_buff *skb;
>  		const void *rx_buf;
>  		dma_addr_t addr;
>  		u32 status;
>  
> -		status = le32_to_cpu(READ_ONCE(desc->opts1));
> -		if (status & DescOwn)
> -			break;
> +		status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
> +
> +		if (status & DescOwn) {
> +			if (!tp->recheck_desc_ownbit)
> +				break;
> +
> +			/* Workaround for a hardware issue:

Hardware issue on which chip version(s)?

> +			 * Hardware might trigger RX interrupt before the DMA
> +			 * engine fully updates RX desc ownbit in host memory.
> +			 * So we do a quirk and re-read to avoid missing RX
> +			 * packets.
> +			 */
> +			tp->recheck_desc_ownbit = false;
> +			rtl8169_desc_quirk(tp);
> +			status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
> +			if (status & DescOwn)
> +				break;
> +		}
>  
>  		/* This barrier is needed to keep us from reading
>  		 * any other fields out of the Rx descriptor until
> @@ -4869,20 +5054,15 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		 */
>  		dma_rmb();
>  
> -		if (unlikely(status & RxRES)) {
> +		if (rtl8169_check_rx_desc_error(dev, tp, status)) {
>  			if (net_ratelimit())
>  				netdev_warn(dev, "Rx ERROR. status = %08x\n",
>  					    status);
> +
>  			dev->stats.rx_errors++;
> -			if (status & (RxRWT | RxRUNT))
> -				dev->stats.rx_length_errors++;
> -			if (status & RxCRC)
> -				dev->stats.rx_crc_errors++;
>  
>  			if (!(dev->features & NETIF_F_RXALL))
>  				goto release_descriptor;
> -			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
> -				goto release_descriptor;
>  		}
>  
>  		pkt_size = status & GENMASK(13, 0);
> @@ -4898,14 +5078,14 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  			goto release_descriptor;
>  		}
>  
> -		skb = napi_alloc_skb(&tp->r8169napi[0].napi, pkt_size);
> +		skb = napi_alloc_skb(&tp->r8169napi[ring->index].napi, pkt_size);
>  		if (unlikely(!skb)) {
>  			dev->stats.rx_dropped++;
>  			goto release_descriptor;
>  		}
>  
> -		addr = le64_to_cpu(desc->addr);
> -		rx_buf = page_address(tp->Rx_databuff[entry]);
> +		addr = ring->rx_desc_phy_addr[entry];
> +		rx_buf = page_address(ring->rx_databuff[entry]);
>  
>  		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
>  		prefetch(rx_buf);
> @@ -4914,7 +5094,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		skb->len = pkt_size;
>  		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
>  
> -		rtl8169_rx_csum(skb, status);
> +		rtl8169_rx_csum(tp, skb, desc);
>  		skb->protocol = eth_type_trans(skb, dev);
>  
>  		rtl8169_rx_vlan_tag(desc, skb);
> @@ -4922,11 +5102,12 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (skb->pkt_type == PACKET_MULTICAST)
>  			dev->stats.multicast++;
>  
> -		napi_gro_receive(&tp->r8169napi[0].napi, skb);
> +		napi_gro_receive(&tp->r8169napi[ring->index].napi, skb);
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
>  release_descriptor:
> -		rtl8169_mark_to_asic(desc);
> +		rtl8169_set_desc_dma_addr(tp, desc, ring->rx_desc_phy_addr[entry]);
> +		rtl8169_mark_to_asic(tp, desc);
>  	}
>  
>  	return count;
> @@ -4952,6 +5133,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  		phy_mac_interrupt(tp->phydev);
>  
>  	rtl_irq_disable(tp);
> +	tp->recheck_desc_ownbit = true;
>  	napi_schedule(&napi->napi);
>  out:
>  	rtl_ack_events(tp, status);
> @@ -5040,7 +5222,8 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>  
>  	rtl_tx(dev, tp, budget);
>  
> -	work_done = rtl_rx(dev, tp, budget);
> +	for (int i = 0; i < tp->num_rx_rings; i++)
> +		work_done += rtl_rx(dev, tp, &tp->rx_ring[i], budget);
>  
>  	if (work_done < budget && napi_complete_done(napi, work_done))
>  		rtl_irq_enable(tp);
> @@ -5168,21 +5351,21 @@ static int rtl8169_close(struct net_device *dev)
>  	struct pci_dev *pdev = tp->pci_dev;
>  
>  	pm_runtime_get_sync(&pdev->dev);
> -
>  	netif_stop_queue(dev);
> +
>  	rtl8169_down(tp);
> -	rtl8169_rx_clear(tp);
> +	for (int i = 0; i < tp->num_rx_rings; i++)
> +		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
>  
>  	rtl8169_free_irq(tp);
>  
>  	phy_disconnect(tp->phydev);
>  
> -	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
> -			  tp->RxPhyAddr);
>  	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
>  			  tp->TxPhyAddr);
>  	tp->TxDescArray = NULL;
> -	tp->RxDescArray = NULL;
> +
> +	rtl8169_free_rx_desc(tp);
>  
>  	pm_runtime_put_sync(&pdev->dev);
>  
> @@ -5211,16 +5394,15 @@ static int rtl_open(struct net_device *dev)
>  	 * Rx and Tx descriptors needs 256 bytes alignment.
>  	 * dma_alloc_coherent provides more.
>  	 */
> +

It's not the only place with unmotivated changes. Please remove these changes.

>  	tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES,
>  					     &tp->TxPhyAddr, GFP_KERNEL);
>  	if (!tp->TxDescArray)
> -		goto out;
> -
> -	tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES,
> -					     &tp->RxPhyAddr, GFP_KERNEL);
> -	if (!tp->RxDescArray)
>  		goto err_free_tx_0;
>  
> +	if (rtl8169_alloc_rx_desc(tp) < 0)
> +		goto err_free_rx_1;
> +
>  	retval = rtl8169_init_ring(tp);
>  	if (retval < 0)
>  		goto err_free_rx_1;
> @@ -5249,11 +5431,10 @@ static int rtl_open(struct net_device *dev)
>  	rtl8169_free_irq(tp);
>  err_release_fw_2:
>  	rtl_release_firmware(tp);
> -	rtl8169_rx_clear(tp);
> +	for (int i = 0; i < tp->num_rx_rings; i++)
> +		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
>  err_free_rx_1:
> -	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
> -			  tp->RxPhyAddr);
> -	tp->RxDescArray = NULL;
> +	rtl8169_free_rx_desc(tp);
>  err_free_tx_0:
>  	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
>  			  tp->TxPhyAddr);
> @@ -5767,7 +5948,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	u32 txconfig;
>  	u32 xid;
>  
> -	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
> +	dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(*tp),
> +				      1,
> +				      R8169_MAX_RX_QUEUES);
> +
>  	if (!dev)
>  		return -ENOMEM;
>  


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch net-next v1 1/7] r8169: add support for multi irqs
  2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
  2026-05-06 21:28   ` Heiner Kallweit
@ 2026-05-06 22:53   ` Jakub Kicinski
  1 sibling, 0 replies; 14+ messages in thread
From: Jakub Kicinski @ 2026-05-06 22:53 UTC (permalink / raw)
  To: javen
  Cc: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, pabeni,
	horms, netdev, linux-kernel

On Wed, 6 May 2026 16:13:19 +0800 javen wrote:
> RSS uses multi rx queues to receive packets, and each rx queue needs one
> irq and napi. So this patch adds support for multi irqs and napi here.

drivers/net/ethernet/realtek/r8169_main.c:5205:16: warning: variable 'irqflags' set but not used [-Wunused-but-set-variable]
 5205 |         unsigned long irqflags;
      |                       ^

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch net-next v1 2/7] r8169: add support for multi rx queues
  2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
  2026-05-06 21:45   ` Heiner Kallweit
@ 2026-05-06 22:54   ` Jakub Kicinski
  1 sibling, 0 replies; 14+ messages in thread
From: Jakub Kicinski @ 2026-05-06 22:54 UTC (permalink / raw)
  To: javen
  Cc: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, pabeni,
	horms, netdev, linux-kernel

On Wed, 6 May 2026 16:13:20 +0800 javen wrote:
> This patch adds support for multi rx queues. RSS requires multi rx
> queues to receive packets. So we need struct rtl8169_rx_ring for each
> queue.

../drivers/net/ethernet/realtek/r8169_main.c:4994:16:    expected unsigned int
../drivers/net/ethernet/realtek/r8169_main.c:4994:16:    got restricted __le32
../drivers/net/ethernet/realtek/r8169_main.c:5032:26: warning: cast to restricted __le32
../drivers/net/ethernet/realtek/r8169_main.c:5046:34: warning: cast to restricted __le32
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [Patch net-next v1 0/7] r8169: add RSS support for RTL8127
  2026-05-06 21:02 ` [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 Heiner Kallweit
@ 2026-05-07  2:19   ` Javen
  0 siblings, 0 replies; 14+ messages in thread
From: Javen @ 2026-05-07  2:19 UTC (permalink / raw)
  To: Heiner Kallweit, nic_swsd@realtek.com, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, horms@kernel.org
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org

>On 06.05.2026 10:13, javen wrote:
>> From: Javen Xu <javen_xu@realsil.com.cn>
>>
>> This patch series adds RSS (Receive Side Scaling) support for the
>> r8169 ethernet driver, specifically for RTL8127 (RTL_GIGA_MAC_VER_80).
>
>Series adds RSS support for RTL8127 only. Is this generic enough to retrofit RSS
>support for other chip versions like RTL8126 w/o bigger refactoring?
>
Yes. The current implementation is generic enough. Almost all the registers are shared and can be reused. If we want to enable rss for 8125/8126 later, the only necessary change would be setting tp->init_rx_desc_type to a new value. And add a new struct rx_desc for 8125 or 8126. For example, this is a desc type for 8125.
struct RxDescV3 {
        union {
                struct {
                        u32 rsv1;
                        u32 rsv2;
                } RxDescDDWord1;
        };
        union {
                struct {
                        u32 RSSResult;
                        u16 HeaderBufferLen;
                        u16 HeaderInfo;
                } RxDescNormalDDWord2;

                struct {
                        u32 rsv5;
                        u32 rsv6;
                } RxDescDDWord2;
        };
        union {
                u64   addr;

                struct {
                        u32 TimeStampLow;
                        u32 TimeStampHigh;
                } RxDescTimeStamp;

                struct {
                        u32 rsv8;
                        u32 rsv9;
                } RxDescDDWord3;
        };
        union {
                struct {
                        u32 opts2;
                        u32 opts1;
                } RxDescNormalDDWord4;

                struct {
                        u16 TimeStampHHigh;
                        u16 rsv11;
                        u32 opts1;
                } RxDescPTPDDWord4;
        };
};
This is complex because it reserves some words for PTP.

Thanks,
BRs,
Javen

>>
>> RSS enables packet distribution across multiple receive queues, which
>> can significantly improve network throughput on multi-core systems by
>> allowing parallel processing of incoming packets.
>>
>> Key features:
>> - Multi-queue RX support (up to 8 queues)
>> - MSI-X interrupt with vector mapping
>> - Dynamic queue configuration via ethtool (-L)
>> - RSS hash computation for flow classification
>>
>> Experiments:
>> Platform: AMD Ryzen Embedded R2514 with Radeon Graphics(4 Cores/8
>> Threads)
>> Arch: x86_64
>> Test command:
>>   Server: iperf3 -s
>>   Client: iperf3 -c 192.168.2.1 -P 20 -t 3600
>> Monitor: mpstat -P ALL 1
>>
>> Before this patch (Without RSS):
>>   Throughput: Unstable, fluctuating between 3.76 Gbits/sec and
>>   8.2 Gbits/sec.
>>   CPU Usage: A single CPU core is fully occupied with softirq reaching
>>   up to 96%.
>>
>> After this patch (With RSS enabled):
>>   Throughput: Stable at 9.42 Gbits/sec.
>>   CPU Usage: The traffic load is evenly distributed across multiple CPU
>>   cores. The maximum softirq on a single core dropped to 63%.
>>
>> Other Experiments:
>> Link:
>> https://lore.kernel.org/netdev/0A5279953D81BB9C+f50c9b49-3e5d-467f-
>b69
>> a-7e49ed223383@radxa.com/
>>
>> Javen Xu (7):
>>   r8169: add support for multi irqs
>>   r8169: add support for multi rx queues
>>   r8169: add support for new interrupt mapping
>>   r8169: enable new interrupt mapping
>>   r8169: add support and enable rss
>>   r8169: move struct ethtool_ops
>>   r8169: add support for ethtool
>>
>>  drivers/net/ethernet/realtek/r8169_main.c | 1202
>> ++++++++++++++++++---
>>  1 file changed, 1080 insertions(+), 122 deletions(-)
>>


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2026-05-07  2:19 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
2026-05-06 21:28   ` Heiner Kallweit
2026-05-06 22:53   ` Jakub Kicinski
2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
2026-05-06 21:45   ` Heiner Kallweit
2026-05-06 22:54   ` Jakub Kicinski
2026-05-06  8:13 ` [Patch net-next v1 3/7] r8169: add support for new interrupt mapping javen
2026-05-06  8:13 ` [Patch net-next v1 4/7] r8169: enable " javen
2026-05-06  8:13 ` [Patch net-next v1 5/7] r8169: add support and enable rss javen
2026-05-06  8:13 ` [Patch net-next v1 6/7] r8169: move struct ethtool_ops javen
2026-05-06  8:13 ` [Patch net-next v1 7/7] r8169: add support for ethtool javen
2026-05-06 21:02 ` [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 Heiner Kallweit
2026-05-07  2:19   ` Javen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox