Netdev List
 help / color / mirror / Atom feed
* [Patch net-next v3 0/7] r8169: add RSS support for RTL8127
@ 2026-05-13 11:55 javen
  2026-05-13 11:55 ` [Patch net-next v3 1/7] r8169: add support for multi irqs javen
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch series adds RSS (Receive Side Scaling) support for the r8169
ethernet driver, specifically for RTL8127 (RTL_GIGA_MAC_VER_80).

RSS enables packet distribution across multiple receive queues, which can
significantly improve network throughput on multi-core systems by allowing
parallel processing of incoming packets.

Key features:
- Multi-queue RX support (up to 8 queues)
- MSI-X interrupt with vector mapping
- Dynamic queue configuration via ethtool (-L)
- RSS hash computation for flow classification

Experiments:
Platform: AMD Ryzen Embedded R2514 with Radeon Graphics(4 Cores/8 Threads)
Arch: x86_64
Test command: 
  Server: iperf3 -s
  Client: iperf3 -c 192.168.2.1 -P 20 -t 3600
Monitor: mpstat -P ALL 1

Before this patch (Without RSS):
  Throughput: Unstable, fluctuating between 3.76 Gbits/sec and
  8.2 Gbits/sec.
  CPU Usage: A single CPU core is fully occupied with softirq reaching 
  up to 96%.

After this patch (With RSS enabled):
  Throughput: Stable at 9.42 Gbits/sec.
  CPU Usage: The traffic load is evenly distributed across multiple CPU
  cores. The maximum softirq on a single core dropped to 63%.
  
Other Experiments:
Link: https://lore.kernel.org/netdev/0A5279953D81BB9C+f50c9b49-3e5d-467f-b69a-7e49ed223383@radxa.com/

Javen Xu (7):
  r8169: add support for multi irqs
  r8169: add support for multi rx queues
  r8169: add support for new interrupt mapping
  r8169: enable new interrupt mapping
  r8169: add support and enable rss
  r8169: move struct ethtool_ops
  r8169: add support for ethtool

 drivers/net/ethernet/realtek/r8169_main.c | 1128 ++++++++++++++++++---
 1 file changed, 992 insertions(+), 136 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch net-next v3 1/7] r8169: add support for multi irqs
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
@ 2026-05-13 11:55 ` javen
  2026-05-16 22:07   ` Heiner Kallweit
  2026-05-13 11:55 ` [Patch net-next v3 2/7] r8169: add support for multi rx queues javen
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

RSS uses multi rx queues to receive packets, and each rx queue needs one
irq and napi. So this patch adds support for multi irqs and napi here.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - remove some unused definitions, such as index, name in rtl8169_irq
 - remove array imr and isr
 - remove min_irq_nvecs and max_irq_nvecs, replaced with help function
   get_min_irq_nvecs and get_max_irq_nvecs
 - alloc irq by flags, instead of PCI_IRQ_ALL_TYPES

Changes in v3:
 - add enum rtl_isr_version to replace macro definition
 - remove struct rtl8169_napi, use napi_struct array instead and alloc
   memory for this array dynamically
 - remove struct rtl8169_irq
---
 drivers/net/ethernet/realtek/r8169_main.c | 146 +++++++++++++++++++---
 1 file changed, 127 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 791277e750ba..e4fc84c97c1e 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -77,6 +77,7 @@
 #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
 #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
 #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
+#define R8169_MAX_MSIX_VEC	32
 
 #define OCP_STD_PHY_BASE	0xa400
 
@@ -435,6 +436,8 @@ enum rtl8125_registers {
 #define INT_CFG0_CLKREQEN		BIT(3)
 	IntrMask_8125		= 0x38,
 	IntrStatus_8125		= 0x3c,
+	INTR_VEC_MAP_MASK	= 0x800,
+	INTR_VEC_MAP_STATUS	= 0x802,
 	INT_CFG1_8125		= 0x7a,
 	LEDSEL2			= 0x84,
 	LEDSEL1			= 0x86,
@@ -578,6 +581,11 @@ enum rtl_register_content {
 	MagicPacket_v2	= (1 << 16),	/* Wake up when receives a Magic Packet */
 };
 
+enum rtl_isr_version {
+	RTL_ISR_VER_DEFAULT = 0,
+	RTL_ISR_VER_8127,
+};
+
 enum rtl_desc_bit {
 	/* First doubleword. */
 	DescOwn		= (1 << 31), /* Descriptor is owned by NIC */
@@ -733,7 +741,6 @@ struct rtl8169_private {
 	struct pci_dev *pci_dev;
 	struct net_device *dev;
 	struct phy_device *phydev;
-	struct napi_struct napi;
 	enum mac_version mac_version;
 	enum rtl_dash_type dash_type;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
@@ -745,9 +752,16 @@ struct rtl8169_private {
 	dma_addr_t RxPhyAddr;
 	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
+	struct napi_struct *rtl8169_napi;
+	unsigned int num_rx_rings;
 	u16 cp_cmd;
 	u16 tx_lpi_timer;
 	u32 irq_mask;
+	u16 hw_supp_num_rx_queues;
+	enum rtl_isr_version hw_supp_isr_ver;
+	enum rtl_isr_version hw_curr_isr_ver;
+	u8 irq_nvecs;
+	bool recheck_desc_ownbit;
 	int irq;
 	struct clk *clk;
 
@@ -763,6 +777,8 @@ struct rtl8169_private {
 	unsigned aspm_manageable:1;
 	unsigned dash_enabled:1;
 	bool sfp_mode:1;
+	bool rss_support:1;
+	bool rss_enable:1;
 	dma_addr_t counters_phys_addr;
 	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
@@ -2680,6 +2696,21 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
 	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
 
+static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
+{
+	tp->num_rx_rings = 1;
+
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_80:
+		tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
+		break;
+	default:
+		tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
+		break;
+	}
+	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
+}
+
 static void rtl_request_firmware(struct rtl8169_private *tp)
 {
 	struct rtl_fw *rtl_fw;
@@ -4266,9 +4297,21 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
 	netdev_reset_queue(tp->dev);
 }
 
+static void rtl8169_napi_disable(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++)
+		napi_disable(&tp->rtl8169_napi[i]);
+}
+
+static void rtl8169_napi_enable(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++)
+		napi_enable(&tp->rtl8169_napi[i]);
+}
+
 static void rtl8169_cleanup(struct rtl8169_private *tp)
 {
-	napi_disable(&tp->napi);
+	rtl8169_napi_disable(tp);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
 	synchronize_net();
@@ -4314,7 +4357,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	for (i = 0; i < NUM_RX_DESC; i++)
 		rtl8169_mark_to_asic(tp->RxDescArray + i);
 
-	napi_enable(&tp->napi);
+	rtl8169_napi_enable(tp);
 	rtl_hw_start(tp);
 }
 
@@ -4820,7 +4863,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 			goto release_descriptor;
 		}
 
-		skb = napi_alloc_skb(&tp->napi, pkt_size);
+		skb = napi_alloc_skb(&tp->rtl8169_napi[0], pkt_size);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
 			goto release_descriptor;
@@ -4844,7 +4887,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		if (skb->pkt_type == PACKET_MULTICAST)
 			dev->stats.multicast++;
 
-		napi_gro_receive(&tp->napi, skb);
+		napi_gro_receive(&tp->rtl8169_napi[0], skb);
 
 		dev_sw_netstats_rx_add(dev, pkt_size);
 release_descriptor:
@@ -4856,7 +4899,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 
 static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 {
-	struct rtl8169_private *tp = dev_instance;
+	struct napi_struct *napi = dev_instance;
+	struct rtl8169_private *tp = netdev_priv(napi->dev);
 	u32 status = rtl_get_events(tp);
 
 	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
@@ -4873,13 +4917,41 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		phy_mac_interrupt(tp->phydev);
 
 	rtl_irq_disable(tp);
-	napi_schedule(&tp->napi);
+	napi_schedule(napi);
 out:
 	rtl_ack_events(tp, status);
 
 	return IRQ_HANDLED;
 }
 
+static void rtl8169_free_irq(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++) {
+		struct napi_struct *napi = &tp->rtl8169_napi[i];
+
+		pci_free_irq(tp->pci_dev, i, napi);
+	}
+}
+
+static int rtl8169_request_irq(struct rtl8169_private *tp)
+{
+	struct net_device *dev = tp->dev;
+	struct napi_struct *napi;
+	int rc = 0;
+
+	for (int i = 0; i < tp->irq_nvecs; i++) {
+		napi = &tp->rtl8169_napi[i];
+		rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
+				     NULL, napi, "%s-%d", dev->name, i);
+		if (rc)
+			break;
+	}
+
+	if (rc)
+		rtl8169_free_irq(tp);
+	return rc;
+}
+
 static void rtl_task(struct work_struct *work)
 {
 	struct rtl8169_private *tp =
@@ -4914,9 +4986,9 @@ static void rtl_task(struct work_struct *work)
 
 static int rtl8169_poll(struct napi_struct *napi, int budget)
 {
-	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
-	struct net_device *dev = tp->dev;
-	int work_done;
+	struct net_device *dev = napi->dev;
+	struct rtl8169_private *tp = netdev_priv(dev);
+	int work_done = 0;
 
 	rtl_tx(dev, tp, budget);
 
@@ -5035,7 +5107,7 @@ static void rtl8169_up(struct rtl8169_private *tp)
 	phy_init_hw(tp->phydev);
 	phy_resume(tp->phydev);
 	rtl8169_init_phy(tp);
-	napi_enable(&tp->napi);
+	rtl8169_napi_enable(tp);
 	enable_work(&tp->wk.work);
 	rtl_reset_work(tp);
 
@@ -5053,7 +5125,7 @@ static int rtl8169_close(struct net_device *dev)
 	rtl8169_down(tp);
 	rtl8169_rx_clear(tp);
 
-	free_irq(tp->irq, tp);
+	rtl8169_free_irq(tp);
 
 	phy_disconnect(tp->phydev);
 
@@ -5082,7 +5154,6 @@ static int rtl_open(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	struct pci_dev *pdev = tp->pci_dev;
-	unsigned long irqflags;
 	int retval = -ENOMEM;
 
 	pm_runtime_get_sync(&pdev->dev);
@@ -5107,8 +5178,7 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_request_firmware(tp);
 
-	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
-	retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
+	retval = rtl8169_request_irq(tp);
 	if (retval < 0)
 		goto err_release_fw_2;
 
@@ -5125,7 +5195,7 @@ static int rtl_open(struct net_device *dev)
 	return retval;
 
 err_free_irq:
-	free_irq(tp->irq, tp);
+	rtl8169_free_irq(tp);
 err_release_fw_2:
 	rtl_release_firmware(tp);
 	rtl8169_rx_clear(tp);
@@ -5328,7 +5398,9 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
+	struct pci_dev *pdev = tp->pci_dev;
 	unsigned int flags;
+	int nvecs;
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@@ -5344,7 +5416,14 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 		break;
 	}
 
-	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
+	nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
+
+	if (nvecs < 0)
+		return nvecs;
+
+	tp->irq_nvecs = nvecs;
+
+	return 0;
 }
 
 static void rtl_read_mac_address(struct rtl8169_private *tp,
@@ -5539,6 +5618,17 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
 	}
 }
 
+static int rtl8169_set_real_num_queue(struct rtl8169_private *tp)
+{
+	int retval;
+
+	retval = netif_set_real_num_tx_queues(tp->dev, 1);
+	if (retval < 0)
+		return retval;
+
+	return netif_set_real_num_rx_queues(tp->dev, tp->num_rx_rings);
+}
+
 static int rtl_jumbo_max(struct rtl8169_private *tp)
 {
 	/* Non-GBit versions don't support jumbo frames */
@@ -5599,6 +5689,12 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
 	return false;
 }
 
+static void r8169_init_napi(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->irq_nvecs; i++)
+		netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_chip_info *chip;
@@ -5703,11 +5799,16 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rtl_hw_reset(tp);
 
+	rtl_software_parameter_initialize(tp);
+
 	rc = rtl_alloc_irq(tp);
 	if (rc < 0)
 		return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
 
-	tp->irq = pci_irq_vector(pdev, 0);
+	tp->rtl8169_napi = devm_kcalloc(&pdev->dev, tp->irq_nvecs,
+					sizeof(struct napi_struct), GFP_KERNEL);
+	if (!tp->rtl8169_napi)
+		return -ENOMEM;
 
 	INIT_WORK(&tp->wk.work, rtl_task);
 	disable_work(&tp->wk.work);
@@ -5716,7 +5817,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 
-	netif_napi_add(dev, &tp->napi, rtl8169_poll);
+	if (!tp->rss_support)
+		netif_napi_add(dev, &tp->rtl8169_napi[0], rtl8169_poll);
+	else
+		r8169_init_napi(tp);
 
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
@@ -5778,6 +5882,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (jumbo_max)
 		dev->max_mtu = jumbo_max;
 
+	rc = rtl8169_set_real_num_queue(tp);
+	if (rc < 0)
+		return dev_err_probe(&pdev->dev, rc, "set tx/rx num failure\n");
+
 	rtl_set_irq_mask(tp);
 
 	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Patch net-next v3 2/7] r8169: add support for multi rx queues
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
  2026-05-13 11:55 ` [Patch net-next v3 1/7] r8169: add support for multi irqs javen
@ 2026-05-13 11:55 ` javen
  2026-05-16 22:07   ` Heiner Kallweit
  2026-05-13 11:55 ` [Patch net-next v3 3/7] r8169: add support for new interrupt mapping javen
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch adds support for multi rx queues. RSS requires multi rx
queues to receive packets. So we need struct rtl8169_rx_ring for each
queue.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - sort some registers by its number
 - remove some unused definitions, like RX_DESC_RING_TYPE_MAX
 - change recheck_desc_ownbit type
 - remove rdsar_reg in rx_ring struct
 - opts1 are different in rx_desc and rx_desc_rss, move the judgement
   to Patch 5/7

Changes in v3:
 - remove ring->rx_desc_alloc_size, use constant instead
---
 drivers/net/ethernet/realtek/r8169_main.c | 262 ++++++++++++++++------
 1 file changed, 199 insertions(+), 63 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e4fc84c97c1e..9dab0fbcca61 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -74,10 +74,13 @@
 #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
 #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
-#define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
+#define R8169_RX_RING_BYTES	((NUM_RX_DESC + 1) * sizeof(struct RxDesc))
 #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
 #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
+#define R8169_MAX_RX_QUEUES	8
 #define R8169_MAX_MSIX_VEC	32
+#define R8127_MAX_RX_QUEUES	8
+#define R8169_DEFAULT_RX_QUEUES	1
 
 #define OCP_STD_PHY_BASE	0xa400
 
@@ -444,6 +447,7 @@ enum rtl8125_registers {
 	TxPoll_8125		= 0x90,
 	LEDSEL3			= 0x96,
 	MAC0_BKP		= 0x19e0,
+	RDSAR_Q1_LOW		= 0x4000,
 	RSS_CTRL_8125		= 0x4500,
 	Q_NUM_CTRL_8125		= 0x4800,
 	EEE_TXIDLE_TIMER_8125	= 0x6048,
@@ -736,6 +740,21 @@ enum rtl_dash_type {
 	RTL_DASH_25_BP,
 };
 
+enum rx_desc_ring_type {
+	RX_DESC_RING_TYPE_DEFAULT,
+	RX_DESC_RING_TYPE_RSS,
+};
+
+struct rtl8169_rx_ring {
+	u32 index;					/* Rx queue index */
+	u32 cur_rx;					/* Index of next Rx pkt. */
+	u32 dirty_rx;					/* Index for recycling. */
+	struct RxDesc *rx_desc_array;			/* array of Rx Desc*/
+	dma_addr_t rx_desc_phy_addr[NUM_RX_DESC];	/* Rx data buffer physical dma address */
+	dma_addr_t rx_phy_addr;				/* Rx desc physical address */
+	struct page *rx_databuff[NUM_RX_DESC];		/* Rx data buffers */
+};
+
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
@@ -743,16 +762,13 @@ struct rtl8169_private {
 	struct phy_device *phydev;
 	enum mac_version mac_version;
 	enum rtl_dash_type dash_type;
-	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
 	u32 dirty_tx;
 	struct TxDesc *TxDescArray;	/* 256-aligned Tx descriptor ring */
-	struct RxDesc *RxDescArray;	/* 256-aligned Rx descriptor ring */
 	dma_addr_t TxPhyAddr;
-	dma_addr_t RxPhyAddr;
-	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
 	struct napi_struct *rtl8169_napi;
+	struct rtl8169_rx_ring rx_ring[R8169_MAX_RX_QUEUES];
 	unsigned int num_rx_rings;
 	u16 cp_cmd;
 	u16 tx_lpi_timer;
@@ -2635,9 +2651,27 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_rx_desc_init(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		memset(ring->rx_desc_array, 0x0, R8169_RX_RING_BYTES);
+	}
+}
+
 static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 {
-	tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
+	tp->dirty_tx = 0;
+	tp->cur_tx = 0;
+
+	for (int i = 0; i < tp->hw_supp_num_rx_queues; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		ring->dirty_rx = 0;
+		ring->cur_rx = 0;
+		ring->index = i;
+	}
 }
 
 static void rtl_jumbo_config(struct rtl8169_private *tp)
@@ -2702,9 +2736,11 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_80:
+		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
 		tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
 		break;
 	default:
+		tp->hw_supp_num_rx_queues = R8169_DEFAULT_RX_QUEUES;
 		tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
 		break;
 	}
@@ -2835,6 +2871,8 @@ static void rtl_set_rx_max_size(struct rtl8169_private *tp)
 
 static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
+	struct rtl8169_rx_ring *ring = &tp->rx_ring[0];
+
 	/*
 	 * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
 	 * register to be written before TxDescAddrLow to work.
@@ -2842,8 +2880,16 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 	 */
 	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
 	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, RxDescAddrHigh, ((u64) ring->rx_phy_addr) >> 32);
+	RTL_W32(tp, RxDescAddrLow, ((u64) ring->rx_phy_addr) & DMA_BIT_MASK(32));
+
+	for (int i = 1; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+		u16 rdsar_reg = (u16)(RDSAR_Q1_LOW + (i - 1) * 8);
+
+		RTL_W32(tp, rdsar_reg + 4, ((u64)ring->rx_phy_addr >> 32));
+		RTL_W32(tp, rdsar_reg, ((u64)ring->rx_phy_addr) & DMA_BIT_MASK(32));
+	}
 }
 
 static void rtl8169_set_magic_reg(struct rtl8169_private *tp)
@@ -4190,12 +4236,13 @@ static void rtl8169_mark_to_asic(struct RxDesc *desc)
 }
 
 static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
-					  struct RxDesc *desc)
+					  struct rtl8169_rx_ring *ring, unsigned int index)
 {
 	struct device *d = tp_to_dev(tp);
 	int node = dev_to_node(d);
 	dma_addr_t mapping;
 	struct page *data;
+	struct RxDesc *desc = ring->rx_desc_array + index;
 
 	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
 	if (!data)
@@ -4209,55 +4256,100 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 	}
 
 	desc->addr = cpu_to_le64(mapping);
+	ring->rx_desc_phy_addr[index] = mapping;
 	rtl8169_mark_to_asic(desc);
 
 	return data;
 }
 
-static void rtl8169_rx_clear(struct rtl8169_private *tp)
+static void rtl8169_rx_clear(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
 {
 	int i;
 
-	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
+	for (i = 0; i < NUM_RX_DESC && ring->rx_databuff[i]; i++) {
 		dma_unmap_page(tp_to_dev(tp),
-			       le64_to_cpu(tp->RxDescArray[i].addr),
+			       ring->rx_desc_phy_addr[i],
 			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
-		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
-		tp->Rx_databuff[i] = NULL;
-		tp->RxDescArray[i].addr = 0;
-		tp->RxDescArray[i].opts1 = 0;
+		__free_pages(ring->rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
+		ring->rx_databuff[i] = NULL;
+		ring->rx_desc_phy_addr[i] = 0;
+		ring->rx_desc_array[i].addr = 0;
+		ring->rx_desc_array[i].opts1 = 0;
 	}
 }
 
-static int rtl8169_rx_fill(struct rtl8169_private *tp)
+static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
 {
 	int i;
 
 	for (i = 0; i < NUM_RX_DESC; i++) {
 		struct page *data;
 
-		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
+		data = rtl8169_alloc_rx_data(tp, ring, i);
 		if (!data) {
-			rtl8169_rx_clear(tp);
+			rtl8169_rx_clear(tp, ring);
 			return -ENOMEM;
 		}
-		tp->Rx_databuff[i] = data;
+		ring->rx_databuff[i] = data;
 	}
 
 	/* mark as last descriptor in the ring */
-	tp->RxDescArray[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
+	ring->rx_desc_array[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
+
+	return 0;
+}
+
+static int rtl8169_alloc_rx_desc(struct rtl8169_private *tp)
+{
+	struct rtl8169_rx_ring *ring;
+	struct pci_dev *pdev = tp->pci_dev;
 
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		ring = &tp->rx_ring[i];
+		ring->rx_desc_array = dma_alloc_coherent(&pdev->dev,
+							 R8169_RX_RING_BYTES,
+							 &ring->rx_phy_addr,
+							 GFP_KERNEL);
+		if (!ring->rx_desc_array)
+			return -1;
+	}
 	return 0;
 }
 
+static void rtl8169_free_rx_desc(struct rtl8169_private *tp)
+{
+	struct rtl8169_rx_ring *ring;
+	struct pci_dev *pdev = tp->pci_dev;
+
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		ring = &tp->rx_ring[i];
+		if (ring->rx_desc_array) {
+			dma_free_coherent(&pdev->dev,
+					  R8169_RX_RING_BYTES,
+					  ring->rx_desc_array,
+					  ring->rx_phy_addr);
+			ring->rx_desc_array = NULL;
+		}
+	}
+}
+
 static int rtl8169_init_ring(struct rtl8169_private *tp)
 {
+	int retval = 0;
+
 	rtl8169_init_ring_indexes(tp);
+	rtl8169_rx_desc_init(tp);
 
 	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
-	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
 
-	return rtl8169_rx_fill(tp);
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
+
+		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
+		retval = rtl8169_rx_fill(tp, ring);
+	}
+
+	return retval;
 }
 
 static void rtl8169_unmap_tx_skb(struct rtl8169_private *tp, unsigned int entry)
@@ -4346,16 +4438,23 @@ static void rtl8169_cleanup(struct rtl8169_private *tp)
 	rtl8169_init_ring_indexes(tp);
 }
 
-static void rtl_reset_work(struct rtl8169_private *tp)
+static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
 {
-	int i;
+	for (int i = 0; i < tp->num_rx_rings; i++) {
+		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
 
+		for (int j = 0; j < NUM_RX_DESC; j++)
+			rtl8169_mark_to_asic(ring->rx_desc_array + j);
+	}
+}
+
+static void rtl_reset_work(struct rtl8169_private *tp)
+{
 	netif_stop_queue(tp->dev);
 
 	rtl8169_cleanup(tp);
 
-	for (i = 0; i < NUM_RX_DESC; i++)
-		rtl8169_mark_to_asic(tp->RxDescArray + i);
+	rtl8169_rx_desc_reset(tp);
 
 	rtl8169_napi_enable(tp);
 	rtl_hw_start(tp);
@@ -4749,6 +4848,11 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 }
 
+static void rtl8169_desc_quirk(struct rtl8169_private *tp)
+{
+	RTL_R8(tp, LED_CTRL);
+}
+
 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
 		   int budget)
 {
@@ -4801,9 +4905,10 @@ static inline int rtl8169_fragmented_frame(u32 status)
 	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
 }
 
-static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
+static inline void rtl8169_rx_csum(struct sk_buff *skb,
+				   struct RxDesc *desc)
 {
-	u32 status = opts1 & (RxProtoMask | RxCSFailMask);
+	u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
 
 	if (status == RxProtoTCP || status == RxProtoUDP)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -4811,22 +4916,58 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
 		skb_checksum_none_assert(skb);
 }
 
-static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget)
+static bool rtl8169_check_rx_desc_error(struct net_device *dev,
+					struct rtl8169_private *tp,
+					u32 status)
+{
+	if (unlikely(status & RxRES)) {
+		if (status & (RxRWT | RxRUNT))
+			dev->stats.rx_length_errors++;
+		if (status & RxCRC)
+			dev->stats.rx_crc_errors++;
+		return true;
+	}
+	return false;
+}
+
+static void rtl8169_set_desc_dma_addr(struct RxDesc *desc,
+				      dma_addr_t mapping)
+{
+	desc->addr = cpu_to_le64(mapping);
+}
+
+static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
+		  struct rtl8169_rx_ring *ring, int budget)
 {
 	struct device *d = tp_to_dev(tp);
 	int count;
 
-	for (count = 0; count < budget; count++, tp->cur_rx++) {
-		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
-		struct RxDesc *desc = tp->RxDescArray + entry;
+	for (count = 0; count < budget; count++, ring->cur_rx++) {
+		unsigned int pkt_size, entry = ring->cur_rx % NUM_RX_DESC;
+		struct RxDesc *desc = ring->rx_desc_array + entry;
 		struct sk_buff *skb;
 		const void *rx_buf;
 		dma_addr_t addr;
 		u32 status;
 
 		status = le32_to_cpu(READ_ONCE(desc->opts1));
-		if (status & DescOwn)
-			break;
+
+		if (status & DescOwn) {
+			if (!tp->recheck_desc_ownbit)
+				break;
+
+			/* Workaround for a hardware issue:
+			 * Hardware might trigger RX interrupt before the DMA
+			 * engine fully updates RX desc ownbit in host memory.
+			 * So we do a quirk and re-read to avoid missing RX
+			 * packets.
+			 */
+			tp->recheck_desc_ownbit = false;
+			rtl8169_desc_quirk(tp);
+			status = le32_to_cpu(READ_ONCE(desc->opts1));
+			if (status & DescOwn)
+				break;
+		}
 
 		/* This barrier is needed to keep us from reading
 		 * any other fields out of the Rx descriptor until
@@ -4834,20 +4975,14 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		 */
 		dma_rmb();
 
-		if (unlikely(status & RxRES)) {
+		if (rtl8169_check_rx_desc_error(dev, tp, status)) {
 			if (net_ratelimit())
 				netdev_warn(dev, "Rx ERROR. status = %08x\n",
 					    status);
 			dev->stats.rx_errors++;
-			if (status & (RxRWT | RxRUNT))
-				dev->stats.rx_length_errors++;
-			if (status & RxCRC)
-				dev->stats.rx_crc_errors++;
 
 			if (!(dev->features & NETIF_F_RXALL))
 				goto release_descriptor;
-			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
-				goto release_descriptor;
 		}
 
 		pkt_size = status & GENMASK(13, 0);
@@ -4863,14 +4998,14 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 			goto release_descriptor;
 		}
 
-		skb = napi_alloc_skb(&tp->rtl8169_napi[0], pkt_size);
+		skb = napi_alloc_skb(&tp->rtl8169_napi[ring->index], pkt_size);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
 			goto release_descriptor;
 		}
 
-		addr = le64_to_cpu(desc->addr);
-		rx_buf = page_address(tp->Rx_databuff[entry]);
+		addr = ring->rx_desc_phy_addr[entry];
+		rx_buf = page_address(ring->rx_databuff[entry]);
 
 		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
 		prefetch(rx_buf);
@@ -4879,7 +5014,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		skb->len = pkt_size;
 		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
-		rtl8169_rx_csum(skb, status);
+		rtl8169_rx_csum(skb, desc);
 		skb->protocol = eth_type_trans(skb, dev);
 
 		rtl8169_rx_vlan_tag(desc, skb);
@@ -4887,10 +5022,11 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		if (skb->pkt_type == PACKET_MULTICAST)
 			dev->stats.multicast++;
 
-		napi_gro_receive(&tp->rtl8169_napi[0], skb);
+		napi_gro_receive(&tp->rtl8169_napi[ring->index], skb);
 
 		dev_sw_netstats_rx_add(dev, pkt_size);
 release_descriptor:
+		rtl8169_set_desc_dma_addr(desc, ring->rx_desc_phy_addr[entry]);
 		rtl8169_mark_to_asic(desc);
 	}
 
@@ -4917,6 +5053,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		phy_mac_interrupt(tp->phydev);
 
 	rtl_irq_disable(tp);
+	tp->recheck_desc_ownbit = true;
 	napi_schedule(napi);
 out:
 	rtl_ack_events(tp, status);
@@ -4992,7 +5129,8 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 
 	rtl_tx(dev, tp, budget);
 
-	work_done = rtl_rx(dev, tp, budget);
+	for (int i = 0; i < tp->num_rx_rings; i++)
+		work_done += rtl_rx(dev, tp, &tp->rx_ring[i], budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done))
 		rtl_irq_enable(tp);
@@ -5120,21 +5258,19 @@ static int rtl8169_close(struct net_device *dev)
 	struct pci_dev *pdev = tp->pci_dev;
 
 	pm_runtime_get_sync(&pdev->dev);
-
 	netif_stop_queue(dev);
 	rtl8169_down(tp);
-	rtl8169_rx_clear(tp);
+	for (int i = 0; i < tp->num_rx_rings; i++)
+		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
 
 	rtl8169_free_irq(tp);
 
 	phy_disconnect(tp->phydev);
 
-	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
-			  tp->RxPhyAddr);
 	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
 			  tp->TxPhyAddr);
 	tp->TxDescArray = NULL;
-	tp->RxDescArray = NULL;
+	rtl8169_free_rx_desc(tp);
 
 	pm_runtime_put_sync(&pdev->dev);
 
@@ -5165,13 +5301,11 @@ static int rtl_open(struct net_device *dev)
 	tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES,
 					     &tp->TxPhyAddr, GFP_KERNEL);
 	if (!tp->TxDescArray)
-		goto out;
-
-	tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES,
-					     &tp->RxPhyAddr, GFP_KERNEL);
-	if (!tp->RxDescArray)
 		goto err_free_tx_0;
 
+	if (rtl8169_alloc_rx_desc(tp) < 0)
+		goto err_free_rx_1;
+
 	retval = rtl8169_init_ring(tp);
 	if (retval < 0)
 		goto err_free_rx_1;
@@ -5198,11 +5332,10 @@ static int rtl_open(struct net_device *dev)
 	rtl8169_free_irq(tp);
 err_release_fw_2:
 	rtl_release_firmware(tp);
-	rtl8169_rx_clear(tp);
+	for (int i = 0; i < tp->num_rx_rings; i++)
+		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
 err_free_rx_1:
-	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
-			  tp->RxPhyAddr);
-	tp->RxDescArray = NULL;
+	rtl8169_free_rx_desc(tp);
 err_free_tx_0:
 	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
 			  tp->TxPhyAddr);
@@ -5705,7 +5838,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	u32 txconfig;
 	u32 xid;
 
-	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
+	dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(*tp),
+				      1,
+				      R8169_MAX_RX_QUEUES);
+
 	if (!dev)
 		return -ENOMEM;
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Patch net-next v3 3/7] r8169: add support for new interrupt mapping
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
  2026-05-13 11:55 ` [Patch net-next v3 1/7] r8169: add support for multi irqs javen
  2026-05-13 11:55 ` [Patch net-next v3 2/7] r8169: add support for multi rx queues javen
@ 2026-05-13 11:55 ` javen
  2026-05-16 22:07   ` Heiner Kallweit
  2026-05-13 11:55 ` [Patch net-next v3 4/7] r8169: enable " javen
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

To support RSS, the number of hardware interrupt bits should match the
interrupt of software. So we add support for new interrupt mapping here.
ISR_VER_MAP_REG is the hardware register to indicate interrupt status.
IMR_SET_VEC_MAP_REG is interrupt mask which is set to enable irq.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - no changes

Changes in v3:
 - init index in napi_struct and get message_id from index
 - move rtl8169_disable_hw_interrupt_msix directly before the call to
   napi_schedule()
 - change the condition in rtl8169_request_irq when RTL_VEC_MAP_ENABLE
   enabled, use rtl8169_interrupt_msix
---
 drivers/net/ethernet/realtek/r8169_main.c | 165 ++++++++++++++++++++--
 1 file changed, 151 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9dab0fbcca61..f259cc0cee37 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -78,6 +78,7 @@
 #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
 #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
 #define R8169_MAX_RX_QUEUES	8
+#define R8127_MAX_TX_QUEUES	8
 #define R8169_MAX_MSIX_VEC	32
 #define R8127_MAX_RX_QUEUES	8
 #define R8169_DEFAULT_RX_QUEUES	1
@@ -451,8 +452,13 @@ enum rtl8125_registers {
 	RSS_CTRL_8125		= 0x4500,
 	Q_NUM_CTRL_8125		= 0x4800,
 	EEE_TXIDLE_TIMER_8125	= 0x6048,
+	IMR_CLEAR_VEC_MAP_REG	= 0x0d00,
+	ISR_VEC_MAP_REG		= 0x0d04,
+	IMR_SET_VEC_MAP_REG	= 0x0d0c,
 };
 
+#define MSIX_ID_VEC_MAP_LINKCHG	29
+#define RTL_VEC_MAP_ENABLE	BIT(0)
 #define LEDSEL_MASK_8125	0x23f
 
 #define RX_VLAN_INNER_8125	BIT(22)
@@ -583,6 +589,9 @@ enum rtl_register_content {
 
 	/* magic enable v2 */
 	MagicPacket_v2	= (1 << 16),	/* Wake up when receives a Magic Packet */
+#define	ISRIMR_LINKCHG	BIT(29)
+#define	ISRIMR_TOK_Q0	BIT(8)
+#define	ISRIMR_ROK_Q0	BIT(0)
 };
 
 enum rtl_isr_version {
@@ -778,6 +787,7 @@ struct rtl8169_private {
 	enum rtl_isr_version hw_curr_isr_ver;
 	u8 irq_nvecs;
 	bool recheck_desc_ownbit;
+	unsigned int features;
 	int irq;
 	struct clk *clk;
 
@@ -1676,26 +1686,36 @@ static u32 rtl_get_events(struct rtl8169_private *tp)
 
 static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)
 {
-	if (rtl_is_8125(tp))
+	if (rtl_is_8125(tp)) {
 		RTL_W32(tp, IntrStatus_8125, bits);
-	else
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			RTL_W32(tp, ISR_VEC_MAP_REG, 0xffffffff);
+	} else {
 		RTL_W16(tp, IntrStatus, bits);
+	}
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-	if (rtl_is_8125(tp))
+	if (rtl_is_8125(tp)) {
 		RTL_W32(tp, IntrMask_8125, 0);
-	else
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, 0xffffffff);
+	} else {
 		RTL_W16(tp, IntrMask, 0);
+	}
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp)
 {
-	if (rtl_is_8125(tp))
-		RTL_W32(tp, IntrMask_8125, tp->irq_mask);
-	else
+	if (rtl_is_8125(tp)) {
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			RTL_W32(tp, IMR_SET_VEC_MAP_REG, tp->irq_mask);
+		else
+			RTL_W32(tp, IntrMask_8125, tp->irq_mask);
+	} else {
 		RTL_W16(tp, IntrMask, tp->irq_mask);
+	}
 }
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
@@ -5070,6 +5090,42 @@ static void rtl8169_free_irq(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_disable_hw_interrupt_msix(struct rtl8169_private *tp, int message_id)
+{
+	RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, BIT(message_id));
+}
+
+static void rtl8169_clear_hw_isr(struct rtl8169_private *tp, int message_id)
+{
+	RTL_W32(tp, ISR_VEC_MAP_REG, BIT(message_id));
+}
+
+static void rtl8169_enable_hw_interrupt_msix(struct rtl8169_private *tp, int message_id)
+{
+	RTL_W32(tp, IMR_SET_VEC_MAP_REG, BIT(message_id));
+}
+
+static irqreturn_t rtl8169_interrupt_msix(int irq, void *dev_instance)
+{
+	struct napi_struct *napi = dev_instance;
+	struct net_device *dev = napi->dev;
+	struct rtl8169_private *tp = netdev_priv(dev);
+	int message_id = napi->index;
+
+	rtl8169_clear_hw_isr(tp, message_id);
+
+	if (message_id == MSIX_ID_VEC_MAP_LINKCHG) {
+		phy_mac_interrupt(tp->phydev);
+		return IRQ_HANDLED;
+	}
+
+	tp->recheck_desc_ownbit = true;
+	rtl8169_disable_hw_interrupt_msix(tp, message_id);
+	napi_schedule(napi);
+
+	return IRQ_HANDLED;
+}
+
 static int rtl8169_request_irq(struct rtl8169_private *tp)
 {
 	struct net_device *dev = tp->dev;
@@ -5078,8 +5134,12 @@ static int rtl8169_request_irq(struct rtl8169_private *tp)
 
 	for (int i = 0; i < tp->irq_nvecs; i++) {
 		napi = &tp->rtl8169_napi[i];
-		rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
-				     NULL, napi, "%s-%d", dev->name, i);
+		if (tp->features & RTL_VEC_MAP_ENABLE)
+			rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt_msix,
+					     NULL, napi, "%s-%d", dev->name, i);
+		else
+			rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
+					     NULL, napi, "%s-%d", dev->name, i);
 		if (rc)
 			break;
 	}
@@ -5523,10 +5583,16 @@ static const struct net_device_ops rtl_netdev_ops = {
 
 static void rtl_set_irq_mask(struct rtl8169_private *tp)
 {
-	tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
+	if (tp->features & RTL_VEC_MAP_ENABLE) {
+		tp->irq_mask = ISRIMR_LINKCHG | ISRIMR_TOK_Q0;
+		for (int i = 0; i < tp->num_rx_rings; i++)
+			tp->irq_mask |= ISRIMR_ROK_Q0 << i;
+	} else {
+		tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
 
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-		tp->irq_mask |= SYSErr | RxFIFOOver;
+		if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+			tp->irq_mask |= SYSErr | RxFIFOOver;
+	}
 }
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
@@ -5555,6 +5621,8 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 		return nvecs;
 
 	tp->irq_nvecs = nvecs;
+	if (nvecs > 1)
+		tp->features |= RTL_VEC_MAP_ENABLE;
 
 	return 0;
 }
@@ -5822,10 +5890,79 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
 	return false;
 }
 
+static int rtl8169_poll_msix_rx(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct rtl8169_private *tp = netdev_priv(dev);
+	const int message_id = napi->index;
+	int work_done = 0;
+
+	if (message_id < tp->num_rx_rings)
+		work_done += rtl_rx(dev, tp, &tp->rx_ring[message_id], budget);
+
+	if (work_done < budget && napi_complete_done(napi, work_done))
+		rtl8169_enable_hw_interrupt_msix(tp, message_id);
+
+	return work_done;
+}
+
+static int rtl8169_poll_msix_tx(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct rtl8169_private *tp = netdev_priv(dev);
+	const int message_id = napi->index;
+	int tx_ring_idx = message_id - 8;
+	unsigned int work_done = 0;
+
+	if (tx_ring_idx >= 0)
+		rtl_tx(dev, tp, budget);
+
+	if (work_done < budget && napi_complete_done(napi, work_done))
+		rtl8169_enable_hw_interrupt_msix(tp, message_id);
+
+	return work_done;
+}
+
+static int rtl8169_poll_msix_other(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct rtl8169_private *tp = netdev_priv(dev);
+	const int message_id = napi - tp->rtl8169_napi;
+
+	napi_complete_done(napi, budget);
+	rtl8169_enable_hw_interrupt_msix(tp, message_id);
+
+	return 1;
+}
+
 static void r8169_init_napi(struct rtl8169_private *tp)
 {
-	for (int i = 0; i < tp->irq_nvecs; i++)
-		netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
+	for (int i = 0; i < tp->irq_nvecs; i++) {
+		if (tp->features & RTL_VEC_MAP_ENABLE) {
+			switch (tp->hw_curr_isr_ver) {
+			case RTL_ISR_VER_8127:
+				if (i < R8127_MAX_RX_QUEUES)
+					netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
+						       rtl8169_poll_msix_rx);
+				else if (i >= R8127_MAX_RX_QUEUES &&
+					 i < (R8127_MAX_RX_QUEUES +
+					 R8127_MAX_TX_QUEUES))
+					netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
+						       rtl8169_poll_msix_tx);
+				else
+					netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
+						       rtl8169_poll_msix_other);
+				break;
+			default:
+				netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
+					       rtl8169_poll);
+				break;
+			}
+		} else {
+			netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
+		}
+		tp->rtl8169_napi[i].index = i;
+	}
 }
 
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Patch net-next v3 4/7] r8169: enable new interrupt mapping
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
                   ` (2 preceding siblings ...)
  2026-05-13 11:55 ` [Patch net-next v3 3/7] r8169: add support for new interrupt mapping javen
@ 2026-05-13 11:55 ` javen
  2026-05-13 11:55 ` [Patch net-next v3 5/7] r8169: add support and enable rss javen
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch enables new interrupt mapping for RTL8127.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - no changes

Changes in v3:
 - no changes
---
 drivers/net/ethernet/realtek/r8169_main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f259cc0cee37..b9c505e4bc0a 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3955,6 +3955,15 @@ DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
 	return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
 }
 
+static void rtl8169_hw_enable_vec_mapping(struct rtl8169_private *tp)
+{
+	u8 tmp;
+
+	tmp = RTL_R8(tp, INT_CFG0_8125);
+	tmp |= INT_CFG0_ENABLE_8125;
+	RTL_W8(tp, INT_CFG0_8125, tmp);
+}
+
 static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 {
 	rtl_pcie_state_l2l3_disable(tp);
@@ -3963,6 +3972,9 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 	RTL_W32(tp, RSS_CTRL_8125, 0);
 	RTL_W16(tp, Q_NUM_CTRL_8125, 0);
 
+	if (tp->features & RTL_VEC_MAP_ENABLE)
+		rtl8169_hw_enable_vec_mapping(tp);
+
 	/* disable UPS */
 	r8168_mac_ocp_modify(tp, 0xd40a, 0x0010, 0x0000);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Patch net-next v3 5/7] r8169: add support and enable rss
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
                   ` (3 preceding siblings ...)
  2026-05-13 11:55 ` [Patch net-next v3 4/7] r8169: enable " javen
@ 2026-05-13 11:55 ` javen
  2026-05-15  0:21   ` Jakub Kicinski
  2026-05-16 22:07   ` Heiner Kallweit
  2026-05-13 11:55 ` [Patch net-next v3 6/7] r8169: move struct ethtool_ops javen
  2026-05-13 11:55 ` [Patch net-next v3 7/7] r8169: add support for ethtool javen
  6 siblings, 2 replies; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch adds support and enable rss for RTL8127.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - some changes moved from Patch 2/7
Changes in v3:
 - add struct rtl8169_rss_data. Allocate it dynamically when needed.
 - define rss_key as an u32 array
 - replace some magic bit numbers in rtl8169_set_rss_hash_opt() and
   rtl8125_set_rx_q_num()
 - use union to combine different rx descriptor, refactor struct RxDesc
 - remove dead code from rtl8169_double_check_rss_support()
---
 drivers/net/ethernet/realtek/r8169_main.c | 405 ++++++++++++++++++++--
 1 file changed, 371 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index b9c505e4bc0a..b90375cef724 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -82,6 +82,19 @@
 #define R8169_MAX_MSIX_VEC	32
 #define R8127_MAX_RX_QUEUES	8
 #define R8169_DEFAULT_RX_QUEUES	1
+#define R8127_MAX_IRQ		32
+#define R8127_MIN_IRQ		30
+#define R8169_IRQ_DEFAULT	1
+#define RTL_RSS_KEY_SIZE	40
+#define RSS_CPU_NUM_MASK	GENMASK(18, 16)
+#define RSS_HASH_MASK		GENMASK(10, 8)
+#define RTL_MAX_INDIRECTION_TABLE_ENTRIES 128
+#define RXS_RSS_UDP		BIT(27)
+#define RXS_RSS_IPV4		BIT(28)
+#define RXS_RSS_IPV6		BIT(29)
+#define RXS_RSS_TCP		BIT(30)
+#define RXS_RSS_L3_TYPE_MASK	(RXS_RSS_IPV4 | RXS_RSS_IPV6)
+#define RXS_RSS_L4_TYPE_MASK	(RXS_RSS_TCP | RXS_RSS_UDP)
 
 #define OCP_STD_PHY_BASE	0xa400
 
@@ -592,6 +605,25 @@ enum rtl_register_content {
 #define	ISRIMR_LINKCHG	BIT(29)
 #define	ISRIMR_TOK_Q0	BIT(8)
 #define	ISRIMR_ROK_Q0	BIT(0)
+#define RTL_DESC_TYPE_CTRL		0xd8
+#define RSS_KEY_REG			0x4600
+#define RSS_INDIRECTION_TBL_REG		0x4700
+#define RSS_CTRL_TCP_IPV4_SUPP		BIT(0)
+#define RTL_DESC_TYPE_RSS		BIT(1)
+#define RSS_CTRL_IPV4_SUPP		BIT(1)
+#define RSS_CTRL_TCP_IPV6_SUPP		BIT(2)
+#define RSS_CTRL_IPV6_SUPP		BIT(3)
+#define RSS_CTRL_IPV6_EXT_SUPP		BIT(4)
+#define RSS_CTRL_TCP_IPV6_EXT_SUPP	BIT(5)
+#define RSS_CTRL_UDP_IPV4_SUPP		BIT(6)
+#define RSS_CTRL_UDP_IPV6_SUPP		BIT(7)
+#define RSS_CTRL_UDP_IPV6_EXT_SUPP	BIT(8)
+#define RTL_RSS_FLAG_HASH_UDP_IPV4	BIT(0)
+#define RTL_RSS_FLAG_HASH_UDP_IPV6	BIT(1)
+#define	RX_RES_RSS			BIT(22)
+#define	RX_RUNT_RSS			BIT(21)
+#define	RX_CRC_RSS			BIT(20)
+#define RTL_RX_Q_NUM_MASK		GENMASK(4, 2)
 };
 
 enum rtl_isr_version {
@@ -654,6 +686,11 @@ enum rtl_rx_desc_bit {
 #define RxProtoIP	(PID1 | PID0)
 #define RxProtoMask	RxProtoIP
 
+#define	RX_UDPT_DESC_RSS	BIT(19)
+#define	RX_TCPT_DESC_RSS	BIT(18)
+#define	RX_UDPF_DESC_RSS	BIT(16) /* UDP/IP checksum failed */
+#define	RX_TCPF_DESC_RSS	BIT(15) /* TCP/IP checksum failed */
+
 	IPFail		= (1 << 16), /* IP checksum failed */
 	UDPFail		= (1 << 15), /* UDP/IP checksum failed */
 	TCPFail		= (1 << 14), /* TCP/IP checksum failed */
@@ -675,9 +712,27 @@ struct TxDesc {
 };
 
 struct RxDesc {
-	__le32 opts1;
-	__le32 opts2;
-	__le64 addr;
+	union {
+		/* RX_DESC_RING_TYPE_DEFAULT */
+		struct {
+			__le32 opts1;
+			__le32 opts2;
+			__le64 addr;
+		};
+
+		/* RX_DESC_RING_TYPE_RSS */
+		struct {
+			union {
+				__le64 rss_addr;
+				struct {
+					__le32 rss_info;
+					__le32 rss_result;
+				} rss_dword;
+			};
+			__le32 rss_opts2;
+			__le32 rss_opts1;
+		};
+	};
 };
 
 struct ring_info {
@@ -764,6 +819,13 @@ struct rtl8169_rx_ring {
 	struct page *rx_databuff[NUM_RX_DESC];		/* Rx data buffers */
 };
 
+struct rtl8169_rss_data {
+	u32 rss_flags;
+	u32 rss_key[RTL_RSS_KEY_SIZE / sizeof(u32)];
+	u8 rss_indir_tbl[RTL_MAX_INDIRECTION_TABLE_ENTRIES];
+	u8 hw_supp_indir_tbl_entries;
+};
+
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
@@ -783,9 +845,11 @@ struct rtl8169_private {
 	u16 tx_lpi_timer;
 	u32 irq_mask;
 	u16 hw_supp_num_rx_queues;
+	struct rtl8169_rss_data *rss_data;
 	enum rtl_isr_version hw_supp_isr_ver;
 	enum rtl_isr_version hw_curr_isr_ver;
 	u8 irq_nvecs;
+	u8 init_rx_desc_type;
 	bool recheck_desc_ownbit;
 	unsigned int features;
 	int irq;
@@ -1620,6 +1684,13 @@ static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
 	}
 }
 
+static bool rtl_check_rss_support(struct rtl8169_private *tp)
+{
+	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
+		return true;
+	return false;
+}
+
 static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
@@ -1919,9 +1990,20 @@ static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
 		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
 }
 
-static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
+static void rtl8169_rx_vlan_tag(struct rtl8169_private *tp,
+				struct RxDesc *desc,
+				struct sk_buff *skb)
 {
-	u32 opts2 = le32_to_cpu(desc->opts2);
+	u32 opts2;
+
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		opts2 = le32_to_cpu(desc->rss_opts2);
+		break;
+	default:
+		opts2 = le32_to_cpu(desc->opts2);
+		break;
+	}
 
 	if (opts2 & RxVlanTag)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
@@ -2750,6 +2832,14 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
 	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
 
+static void rtl8169_init_rss(struct rtl8169_private *tp)
+{
+	for (int i = 0; i < tp->rss_data->hw_supp_indir_tbl_entries; i++)
+		tp->rss_data->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, tp->num_rx_rings);
+
+	netdev_rss_key_fill(tp->rss_data->rss_key, RTL_RSS_KEY_SIZE);
+}
+
 static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
 {
 	tp->num_rx_rings = 1;
@@ -2757,6 +2847,7 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_80:
 		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
+		tp->rss_data->hw_supp_indir_tbl_entries = RTL_MAX_INDIRECTION_TABLE_ENTRIES;
 		tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
 		break;
 	default:
@@ -2764,6 +2855,7 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
 		tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
 		break;
 	}
+	tp->init_rx_desc_type = RX_DESC_RING_TYPE_DEFAULT;
 	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
 }
 
@@ -2889,6 +2981,72 @@ static void rtl_set_rx_max_size(struct rtl8169_private *tp)
 	RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
 }
 
+static void rtl8169_store_rss_key(struct rtl8169_private *tp)
+{
+	u32 *rss_key = tp->rss_data->rss_key;
+	const u16 rss_key_reg = RSS_KEY_REG;
+	u32 num_entries = RTL_RSS_KEY_SIZE / sizeof(u32);
+
+	/* Write redirection table to HW */
+	for (int i = 0; i < num_entries; i++)
+		RTL_W32(tp, rss_key_reg + (i * 4), rss_key[i]);
+}
+
+static void rtl8169_store_reta(struct rtl8169_private *tp)
+{
+	u16 indir_tbl_reg = RSS_INDIRECTION_TBL_REG;
+	u32 i, reta_entries = tp->rss_data->hw_supp_indir_tbl_entries;
+	u32 reta = 0;
+	u8 *indir_tbl = tp->rss_data->rss_indir_tbl;
+
+	/* Write redirection table to HW */
+	for (i = 0; i < reta_entries; i++) {
+		reta |= indir_tbl[i] << (i & 0x3) * 8;
+		if ((i & 3) == 3) {
+			RTL_W32(tp, indir_tbl_reg, reta);
+			indir_tbl_reg += 4;
+			reta = 0;
+		}
+	}
+}
+
+static int rtl8169_set_rss_hash_opt(struct rtl8169_private *tp)
+{
+	u32 rss_flags = tp->rss_data->rss_flags;
+	u32 rss_ctrl;
+
+	rss_ctrl = FIELD_PREP(RSS_CPU_NUM_MASK, ilog2(tp->num_rx_rings));
+
+	/* Perform hash on these packet types */
+	rss_ctrl |= RSS_CTRL_TCP_IPV4_SUPP
+		 | RSS_CTRL_IPV4_SUPP
+		 | RSS_CTRL_IPV6_SUPP
+		 | RSS_CTRL_IPV6_EXT_SUPP
+		 | RSS_CTRL_TCP_IPV6_SUPP
+		 | RSS_CTRL_TCP_IPV6_EXT_SUPP;
+
+	if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV4)
+		rss_ctrl |= RSS_CTRL_UDP_IPV4_SUPP;
+
+	if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV6)
+		rss_ctrl |= RSS_CTRL_UDP_IPV6_SUPP |
+			    RSS_CTRL_UDP_IPV6_EXT_SUPP;
+
+	rss_ctrl |= FIELD_PREP(RSS_HASH_MASK,
+			       ilog2(tp->rss_data->hw_supp_indir_tbl_entries));
+
+	RTL_W32(tp, RSS_CTRL_8125, rss_ctrl);
+
+	return 0;
+}
+
+static void rtl_set_rss_config(struct rtl8169_private *tp)
+{
+	rtl8169_set_rss_hash_opt(tp);
+	rtl8169_store_reta(tp);
+	rtl8169_store_rss_key(tp);
+}
+
 static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
 	struct rtl8169_rx_ring *ring = &tp->rx_ring[0];
@@ -3955,6 +4113,18 @@ DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
 	return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
 }
 
+static void rtl8125_set_rx_q_num(struct rtl8169_private *tp)
+{
+	u16 q_ctrl;
+	u16 rx_q_num;
+
+	rx_q_num = (u16)ilog2(tp->num_rx_rings);
+	q_ctrl = RTL_R16(tp, Q_NUM_CTRL_8125);
+	q_ctrl &= ~RTL_RX_Q_NUM_MASK;
+	q_ctrl |= FIELD_PREP(RTL_RX_Q_NUM_MASK, rx_q_num);
+	RTL_W16(tp, Q_NUM_CTRL_8125, q_ctrl);
+}
+
 static void rtl8169_hw_enable_vec_mapping(struct rtl8169_private *tp)
 {
 	u8 tmp;
@@ -3994,6 +4164,13 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 	    tp->mac_version == RTL_GIGA_MAC_VER_80)
 		RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02);
 
+	/* enable rx descriptor type v4 and set queue num for rss*/
+	if (tp->rss_enable) {
+		rtl8125_set_rx_q_num(tp);
+		RTL_W8(tp, RTL_DESC_TYPE_CTRL,
+		       RTL_R8(tp, RTL_DESC_TYPE_CTRL) | RTL_DESC_TYPE_RSS);
+	}
+
 	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
 		r8168_mac_ocp_modify(tp, 0xe614, 0x0f00, 0x0f00);
 	else if (tp->mac_version == RTL_GIGA_MAC_VER_70)
@@ -4230,6 +4407,12 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
 	rtl_hw_aspm_clkreq_enable(tp, true);
 	rtl_set_rx_max_size(tp);
 	rtl_set_rx_tx_desc_registers(tp);
+	if (rtl_is_8125(tp)) {
+		if (tp->rss_enable)
+			rtl_set_rss_config(tp);
+		else
+			RTL_W32(tp, RSS_CTRL_8125, 0x00);
+	}
 	rtl_lock_config_regs(tp);
 
 	rtl_jumbo_config(tp);
@@ -4257,14 +4440,26 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static void rtl8169_mark_to_asic(struct RxDesc *desc)
+static void rtl8169_mark_to_asic(struct rtl8169_private *tp, struct RxDesc *desc)
 {
-	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
+	u32 eor;
 
-	desc->opts2 = 0;
-	/* Force memory writes to complete before releasing descriptor */
-	dma_wmb();
-	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		eor = le32_to_cpu(desc->rss_opts1) & RingEnd;
+		desc->rss_opts2 = 0;
+		/* Force memory writes to complete before releasing descriptor */
+		dma_wmb();
+		WRITE_ONCE(desc->rss_opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
+		break;
+	default:
+		eor = le32_to_cpu(desc->opts1) & RingEnd;
+		desc->opts2 = 0;
+		/* Force memory writes to complete before releasing descriptor */
+		dma_wmb();
+		WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
+		break;
+	}
 }
 
 static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
@@ -4287,9 +4482,12 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 		return NULL;
 	}
 
-	desc->addr = cpu_to_le64(mapping);
 	ring->rx_desc_phy_addr[index] = mapping;
-	rtl8169_mark_to_asic(desc);
+	if (tp->init_rx_desc_type == RX_DESC_RING_TYPE_RSS)
+		desc->rss_addr = cpu_to_le64(mapping);
+	else
+		desc->addr = cpu_to_le64(mapping);
+	rtl8169_mark_to_asic(tp, desc);
 
 	return data;
 }
@@ -4310,6 +4508,18 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp, struct rtl8169_rx_ring
 	}
 }
 
+static void rtl8169_mark_as_last_descriptor(struct rtl8169_private *tp, struct RxDesc *desc)
+{
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		desc->rss_opts1 |= cpu_to_le32(RingEnd);
+		break;
+	default:
+		desc->opts1 |= cpu_to_le32(RingEnd);
+		break;
+	}
+}
+
 static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
 {
 	int i;
@@ -4326,7 +4536,7 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *r
 	}
 
 	/* mark as last descriptor in the ring */
-	ring->rx_desc_array[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
+	rtl8169_mark_as_last_descriptor(tp, &ring->rx_desc_array[NUM_RX_DESC - 1]);
 
 	return 0;
 }
@@ -4476,7 +4686,7 @@ static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
 		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
 
 		for (int j = 0; j < NUM_RX_DESC; j++)
-			rtl8169_mark_to_asic(ring->rx_desc_array + j);
+			rtl8169_mark_to_asic(tp, ring->rx_desc_array + j);
 	}
 }
 
@@ -4937,35 +5147,104 @@ static inline int rtl8169_fragmented_frame(u32 status)
 	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
 }
 
-static inline void rtl8169_rx_csum(struct sk_buff *skb,
+static inline void rtl8169_rx_hash(struct rtl8169_private *tp,
+				   struct RxDesc *desc,
+				   struct sk_buff *skb)
+{
+	u32 rss_header_info;
+	u32 hash_val;
+
+	if (!(tp->dev->features & NETIF_F_RXHASH))
+		return;
+
+	rss_header_info = le32_to_cpu(desc->rss_dword.rss_info);
+
+	if (!(rss_header_info & RXS_RSS_L3_TYPE_MASK))
+		return;
+
+	hash_val = le32_to_cpu(desc->rss_dword.rss_result);
+
+	skb_set_hash(skb, hash_val,
+		     (RXS_RSS_L4_TYPE_MASK & rss_header_info) ?
+		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
+static inline void rtl8169_rx_csum(struct rtl8169_private *tp,
+				   struct sk_buff *skb,
 				   struct RxDesc *desc)
 {
-	u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
+	bool csum_ok = false;
+	u32 opts1;
 
-	if (status == RxProtoTCP || status == RxProtoUDP)
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		opts1 = le32_to_cpu(desc->rss_opts1);
+		if (((opts1 & RX_TCPT_DESC_RSS) && !(opts1 & RX_TCPF_DESC_RSS)) ||
+		    ((opts1 & RX_UDPT_DESC_RSS) && !(opts1 & RX_UDPF_DESC_RSS)))
+			csum_ok = true;
+		break;
+	default:
+		opts1 = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
+		if (opts1 == RxProtoTCP || opts1 == RxProtoUDP)
+			csum_ok = true;
+		break;
+	}
+
+	if (csum_ok)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	else
 		skb_checksum_none_assert(skb);
 }
 
+static u32 rtl8169_rx_desc_opts1(struct rtl8169_private *tp, struct RxDesc *desc)
+{
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		return READ_ONCE(desc->rss_opts1);
+	default:
+		return READ_ONCE(desc->opts1);
+	}
+}
+
 static bool rtl8169_check_rx_desc_error(struct net_device *dev,
 					struct rtl8169_private *tp,
 					u32 status)
 {
-	if (unlikely(status & RxRES)) {
-		if (status & (RxRWT | RxRUNT))
-			dev->stats.rx_length_errors++;
-		if (status & RxCRC)
-			dev->stats.rx_crc_errors++;
-		return true;
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		if (unlikely(status & RX_RES_RSS)) {
+			if (status & RX_RUNT_RSS)
+				dev->stats.rx_length_errors++;
+			if (status & RX_CRC_RSS)
+				dev->stats.rx_crc_errors++;
+			return true;
+		}
+		break;
+	default:
+		if (unlikely(status & RxRES)) {
+			if (status & (RxRWT | RxRUNT))
+				dev->stats.rx_length_errors++;
+			if (status & RxCRC)
+				dev->stats.rx_crc_errors++;
+			return true;
+		}
+		break;
 	}
 	return false;
 }
 
-static void rtl8169_set_desc_dma_addr(struct RxDesc *desc,
+static void rtl8169_set_desc_dma_addr(struct rtl8169_private *tp,
+				      struct RxDesc *desc,
 				      dma_addr_t mapping)
 {
-	desc->addr = cpu_to_le64(mapping);
+	switch (tp->init_rx_desc_type) {
+	case RX_DESC_RING_TYPE_RSS:
+		desc->rss_addr = cpu_to_le64(mapping);
+		break;
+	default:
+		desc->addr = cpu_to_le64(mapping);
+		break;
+	}
 }
 
 static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
@@ -4982,7 +5261,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
 		dma_addr_t addr;
 		u32 status;
 
-		status = le32_to_cpu(READ_ONCE(desc->opts1));
+		status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
 
 		if (status & DescOwn) {
 			if (!tp->recheck_desc_ownbit)
@@ -4996,7 +5275,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
 			 */
 			tp->recheck_desc_ownbit = false;
 			rtl8169_desc_quirk(tp);
-			status = le32_to_cpu(READ_ONCE(desc->opts1));
+			status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
 			if (status & DescOwn)
 				break;
 		}
@@ -5045,11 +5324,12 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
 		skb->tail += pkt_size;
 		skb->len = pkt_size;
 		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
-
-		rtl8169_rx_csum(skb, desc);
+		if (tp->rss_enable)
+			rtl8169_rx_hash(tp, desc, skb);
+		rtl8169_rx_csum(tp, skb, desc);
 		skb->protocol = eth_type_trans(skb, dev);
 
-		rtl8169_rx_vlan_tag(desc, skb);
+		rtl8169_rx_vlan_tag(tp, desc, skb);
 
 		if (skb->pkt_type == PACKET_MULTICAST)
 			dev->stats.multicast++;
@@ -5058,8 +5338,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
 
 		dev_sw_netstats_rx_add(dev, pkt_size);
 release_descriptor:
-		rtl8169_set_desc_dma_addr(desc, ring->rx_desc_phy_addr[entry]);
-		rtl8169_mark_to_asic(desc);
+		rtl8169_set_desc_dma_addr(tp, desc, ring->rx_desc_phy_addr[entry]);
+		rtl8169_mark_to_asic(tp, desc);
 	}
 
 	return count;
@@ -5607,6 +5887,43 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
 	}
 }
 
+static int get_max_irq_nvecs(struct rtl8169_private *tp)
+{
+	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
+		return R8127_MAX_IRQ;
+	return R8169_IRQ_DEFAULT;
+}
+
+static int get_min_irq_nvecs(struct rtl8169_private *tp)
+{
+	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
+		return R8127_MIN_IRQ;
+	return R8169_IRQ_DEFAULT;
+}
+
+static void rtl8169_double_check_rss_support(struct rtl8169_private *tp)
+{
+	if (tp->hw_curr_isr_ver > RTL_ISR_VER_DEFAULT) {
+		if (!(tp->features & RTL_VEC_MAP_ENABLE) || tp->irq_nvecs < get_min_irq_nvecs(tp))
+			tp->hw_curr_isr_ver = RTL_ISR_VER_8127;
+	}
+
+	if (tp->rss_support && tp->hw_curr_isr_ver > RTL_ISR_VER_DEFAULT) {
+		u8 rss_queue_num = netif_get_num_default_rss_queues();
+
+		tp->num_rx_rings = min(rss_queue_num, tp->hw_supp_num_rx_queues);
+		if (!(tp->num_rx_rings >= 2 && tp->irq_nvecs >= get_min_irq_nvecs(tp)))
+			tp->num_rx_rings = 1;
+	}
+
+	tp->rss_enable = 0;
+
+	if (tp->num_rx_rings >= 2) {
+		tp->rss_enable = 1;
+		tp->init_rx_desc_type = RX_DESC_RING_TYPE_RSS;
+	}
+}
+
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
 	struct pci_dev *pdev = tp->pci_dev;
@@ -5627,7 +5944,10 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 		break;
 	}
 
-	nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
+	nvecs = pci_alloc_irq_vectors(pdev, get_min_irq_nvecs(tp), get_max_irq_nvecs(tp), flags);
+
+	if (nvecs < 0)
+		nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
 
 	if (nvecs < 0)
 		return nvecs;
@@ -6069,6 +6389,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->dash_type = rtl_get_dash_type(tp);
 	tp->dash_enabled = rtl_dash_is_enabled(tp);
+	tp->rss_support = rtl_check_rss_support(tp);
+
+	if (tp->rss_support) {
+		tp->rss_data = devm_kzalloc(&pdev->dev, sizeof(*tp->rss_data), GFP_KERNEL);
+		if (!tp->rss_data)
+			return -ENOMEM;
+	}
 
 	tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
 
@@ -6095,6 +6422,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!tp->rtl8169_napi)
 		return -ENOMEM;
 
+	rtl8169_double_check_rss_support(tp);
+
+	if (tp->rss_support)
+		rtl8169_init_rss(tp);
+
 	INIT_WORK(&tp->wk.work, rtl_task);
 	disable_work(&tp->wk.work);
 
@@ -6112,6 +6444,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
+	if (tp->rss_support) {
+		dev->hw_features |= NETIF_F_RXHASH;
+		dev->features |= NETIF_F_RXHASH;
+	}
+
 	/*
 	 * Pretend we are using VLANs; This bypasses a nasty bug where
 	 * Interrupts stop flowing on high load on 8110SCd controllers.
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Patch net-next v3 6/7] r8169: move struct ethtool_ops
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
                   ` (4 preceding siblings ...)
  2026-05-13 11:55 ` [Patch net-next v3 5/7] r8169: add support and enable rss javen
@ 2026-05-13 11:55 ` javen
  2026-05-13 11:55 ` [Patch net-next v3 7/7] r8169: add support for ethtool javen
  6 siblings, 0 replies; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch move struct ethtool_ops to support the new function
rtl8169_get_channels and rtl8169_set_channels. The two functions
need a forward declaration.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - no changes

Changes in v3:
 - no changes
---
 drivers/net/ethernet/realtek/r8169_main.c | 56 +++++++++++------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index b90375cef724..f654e98e47be 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2550,34 +2550,6 @@ static int rtl8169_set_link_ksettings(struct net_device *ndev,
 	return 0;
 }
 
-static const struct ethtool_ops rtl8169_ethtool_ops = {
-	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
-				     ETHTOOL_COALESCE_MAX_FRAMES,
-	.get_drvinfo		= rtl8169_get_drvinfo,
-	.get_regs_len		= rtl8169_get_regs_len,
-	.get_link		= ethtool_op_get_link,
-	.get_coalesce		= rtl_get_coalesce,
-	.set_coalesce		= rtl_set_coalesce,
-	.get_regs		= rtl8169_get_regs,
-	.get_wol		= rtl8169_get_wol,
-	.set_wol		= rtl8169_set_wol,
-	.get_strings		= rtl8169_get_strings,
-	.get_sset_count		= rtl8169_get_sset_count,
-	.get_ethtool_stats	= rtl8169_get_ethtool_stats,
-	.get_ts_info		= ethtool_op_get_ts_info,
-	.nway_reset		= phy_ethtool_nway_reset,
-	.get_eee		= rtl8169_get_eee,
-	.set_eee		= rtl8169_set_eee,
-	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
-	.set_link_ksettings	= rtl8169_set_link_ksettings,
-	.get_ringparam		= rtl8169_get_ringparam,
-	.get_pause_stats	= rtl8169_get_pause_stats,
-	.get_pauseparam		= rtl8169_get_pauseparam,
-	.set_pauseparam		= rtl8169_set_pauseparam,
-	.get_eth_mac_stats	= rtl8169_get_eth_mac_stats,
-	.get_eth_ctrl_stats	= rtl8169_get_eth_ctrl_stats,
-};
-
 static const struct rtl_chip_info *rtl8169_get_chip_version(u32 xid, bool gmii)
 {
 	/* Chips combining a 1Gbps MAC with a 100Mbps PHY */
@@ -6297,6 +6269,34 @@ static void r8169_init_napi(struct rtl8169_private *tp)
 	}
 }
 
+static const struct ethtool_ops rtl8169_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_MAX_FRAMES,
+	.get_drvinfo		= rtl8169_get_drvinfo,
+	.get_regs_len		= rtl8169_get_regs_len,
+	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= rtl_get_coalesce,
+	.set_coalesce		= rtl_set_coalesce,
+	.get_regs		= rtl8169_get_regs,
+	.get_wol		= rtl8169_get_wol,
+	.set_wol		= rtl8169_set_wol,
+	.get_strings		= rtl8169_get_strings,
+	.get_sset_count		= rtl8169_get_sset_count,
+	.get_ethtool_stats	= rtl8169_get_ethtool_stats,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.nway_reset		= phy_ethtool_nway_reset,
+	.get_eee		= rtl8169_get_eee,
+	.set_eee		= rtl8169_set_eee,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= rtl8169_set_link_ksettings,
+	.get_ringparam		= rtl8169_get_ringparam,
+	.get_pause_stats	= rtl8169_get_pause_stats,
+	.get_pauseparam		= rtl8169_get_pauseparam,
+	.set_pauseparam		= rtl8169_set_pauseparam,
+	.get_eth_mac_stats	= rtl8169_get_eth_mac_stats,
+	.get_eth_ctrl_stats	= rtl8169_get_eth_ctrl_stats,
+};
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_chip_info *chip;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Patch net-next v3 7/7] r8169: add support for ethtool
  2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
                   ` (5 preceding siblings ...)
  2026-05-13 11:55 ` [Patch net-next v3 6/7] r8169: move struct ethtool_ops javen
@ 2026-05-13 11:55 ` javen
  2026-05-16 22:07   ` Heiner Kallweit
  6 siblings, 1 reply; 17+ messages in thread
From: javen @ 2026-05-13 11:55 UTC (permalink / raw)
  To: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, horms
  Cc: netdev, linux-kernel, Javen Xu

From: Javen Xu <javen_xu@realsil.com.cn>

This patch add support for changing rx queues by ethtool. We can set rx
1, 2, 4, 8 by ethtool -L eth1 rx num.

Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
---
Changes in v2:
 - no changes

Changes in v3:
 - no changes
---
 drivers/net/ethernet/realtek/r8169_main.c | 126 ++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f654e98e47be..ae64955e47d4 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -6269,6 +6269,130 @@ static void r8169_init_napi(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl8169_get_channels(struct net_device *dev,
+				 struct ethtool_channels *ch)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	ch->max_rx = tp->hw_supp_num_rx_queues;
+	ch->max_tx = 1;
+	ch->max_other = 0;
+	ch->max_combined = 0;
+
+	ch->rx_count = tp->num_rx_rings;
+	ch->tx_count = 1;
+	ch->other_count = 0;
+	ch->combined_count = 0;
+}
+
+static int rtl8169_realloc_rx(struct rtl8169_private *tp,
+			      struct rtl8169_rx_ring *new_rx,
+			      int new_count)
+{
+	int i, ret;
+
+	for (i = 0; i < new_count; i++) {
+		struct rtl8169_rx_ring *ring = &new_rx[i];
+
+		ring->rx_desc_array = dma_alloc_coherent(&tp->pci_dev->dev,
+							 R8169_RX_RING_BYTES,
+							 &ring->rx_phy_addr,
+							 GFP_KERNEL);
+		if (!ring->rx_desc_array) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+
+		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
+		ret = rtl8169_rx_fill(tp, ring);
+		if (ret) {
+			dma_free_coherent(&tp->pci_dev->dev, R8169_RX_RING_BYTES,
+					  ring->rx_desc_array, ring->rx_phy_addr);
+			goto err_free;
+		}
+	}
+	return 0;
+
+err_free:
+	while (--i >= 0) {
+		rtl8169_rx_clear(tp, &new_rx[i]);
+		dma_free_coherent(&tp->pci_dev->dev, R8169_RX_RING_BYTES,
+				  new_rx[i].rx_desc_array, new_rx[i].rx_phy_addr);
+	}
+	return ret;
+}
+
+static int rtl8169_set_channels(struct net_device *dev,
+				struct ethtool_channels *ch)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	bool if_running = netif_running(dev);
+	struct rtl8169_rx_ring *new_rx;
+	u8 old_tx_desc_type = tp->init_rx_desc_type;
+	u8 new_desc_type;
+	bool new_rss_enable;
+	int i, ret;
+
+	if (!tp->rss_support && (ch->rx_count > 1 || ch->tx_count > 1)) {
+		netdev_warn(dev, "This chip does not support multiple channels/RSS.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!(tp->features & RTL_VEC_MAP_ENABLE))
+		return -EINVAL;
+
+	new_rss_enable = (ch->rx_count > 1 && tp->rss_support);
+	new_desc_type = new_rss_enable ? RX_DESC_RING_TYPE_RSS : RX_DESC_RING_TYPE_DEFAULT;
+	tp->init_rx_desc_type = new_desc_type;
+
+	if (!if_running) {
+		tp->num_rx_rings = ch->rx_count;
+		tp->rss_enable = new_rss_enable;
+		return 0;
+	}
+
+	new_rx = kcalloc(R8169_MAX_RX_QUEUES, sizeof(*new_rx), GFP_KERNEL);
+	if (!new_rx)
+		return -ENOMEM;
+
+	ret = rtl8169_realloc_rx(tp, new_rx, ch->rx_count);
+	if (ret) {
+		kfree(new_rx);
+		tp->init_rx_desc_type = old_tx_desc_type;
+		return ret;
+	}
+
+	netif_stop_queue(dev);
+	rtl8169_down(tp);
+
+	for (i = 0; i < tp->num_rx_rings; i++)
+		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
+	rtl8169_free_rx_desc(tp);
+
+	tp->num_rx_rings = ch->rx_count;
+	tp->rss_enable = new_rss_enable;
+
+	memset(tp->rx_ring, 0, sizeof(tp->rx_ring));
+	memcpy(tp->rx_ring, new_rx, sizeof(*new_rx) * ch->rx_count);
+
+	for (i = 0; i < tp->rss_data->hw_supp_indir_tbl_entries; i++) {
+		if (tp->rss_enable)
+			tp->rss_data->rss_indir_tbl[i] =
+				ethtool_rxfh_indir_default(i, tp->num_rx_rings);
+		else
+			tp->rss_data->rss_indir_tbl[i] = 0;
+	}
+
+	rtl_set_irq_mask(tp);
+
+	rtl8169_up(tp);
+	netif_start_queue(dev);
+
+	kfree(new_rx);
+
+	return 0;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -6287,6 +6411,8 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.nway_reset		= phy_ethtool_nway_reset,
 	.get_eee		= rtl8169_get_eee,
 	.set_eee		= rtl8169_set_eee,
+	.get_channels		= rtl8169_get_channels,
+	.set_channels		= rtl8169_set_channels,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= rtl8169_set_link_ksettings,
 	.get_ringparam		= rtl8169_get_ringparam,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Patch net-next v3 5/7] r8169: add support and enable rss
  2026-05-13 11:55 ` [Patch net-next v3 5/7] r8169: add support and enable rss javen
@ 2026-05-15  0:21   ` Jakub Kicinski
  2026-05-16 22:07   ` Heiner Kallweit
  1 sibling, 0 replies; 17+ messages in thread
From: Jakub Kicinski @ 2026-05-15  0:21 UTC (permalink / raw)
  To: javen
  Cc: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, pabeni,
	horms, netdev, linux-kernel

On Wed, 13 May 2026 19:55:41 +0800 javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> This patch adds support and enable rss for RTL8127.

Still doesn't build cleanly:

../drivers/net/ethernet/realtek/r8169_main.c:5203:24: warning: incorrect type in return expression (different base types)
../drivers/net/ethernet/realtek/r8169_main.c:5203:24:    expected unsigned int
../drivers/net/ethernet/realtek/r8169_main.c:5203:24:    got restricted __le32
../drivers/net/ethernet/realtek/r8169_main.c:5205:24: warning: incorrect type in return expression (different base types)
../drivers/net/ethernet/realtek/r8169_main.c:5205:24:    expected unsigned int
../drivers/net/ethernet/realtek/r8169_main.c:5205:24:    got restricted __le32
../drivers/net/ethernet/realtek/r8169_main.c:5264:26: warning: cast to restricted __le32
../drivers/net/ethernet/realtek/r8169_main.c:5278:34: warning: cast to restricted __le32

Install sparse and build with C=1, please.
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch net-next v3 1/7] r8169: add support for multi irqs
  2026-05-13 11:55 ` [Patch net-next v3 1/7] r8169: add support for multi irqs javen
@ 2026-05-16 22:07   ` Heiner Kallweit
  0 siblings, 0 replies; 17+ messages in thread
From: Heiner Kallweit @ 2026-05-16 22:07 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 13.05.2026 13:55, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> RSS uses multi rx queues to receive packets, and each rx queue needs one
> irq and napi. So this patch adds support for multi irqs and napi here.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
> Changes in v2:
>  - remove some unused definitions, such as index, name in rtl8169_irq
>  - remove array imr and isr
>  - remove min_irq_nvecs and max_irq_nvecs, replaced with help function
>    get_min_irq_nvecs and get_max_irq_nvecs
>  - alloc irq by flags, instead of PCI_IRQ_ALL_TYPES
> 
> Changes in v3:
>  - add enum rtl_isr_version to replace macro definition
>  - remove struct rtl8169_napi, use napi_struct array instead and alloc
>    memory for this array dynamically
>  - remove struct rtl8169_irq
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 146 +++++++++++++++++++---
>  1 file changed, 127 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 791277e750ba..e4fc84c97c1e 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -77,6 +77,7 @@
>  #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
>  #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
>  #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
> +#define R8169_MAX_MSIX_VEC	32
>  
>  #define OCP_STD_PHY_BASE	0xa400
>  
> @@ -435,6 +436,8 @@ enum rtl8125_registers {
>  #define INT_CFG0_CLKREQEN		BIT(3)
>  	IntrMask_8125		= 0x38,
>  	IntrStatus_8125		= 0x3c,
> +	INTR_VEC_MAP_MASK	= 0x800,
> +	INTR_VEC_MAP_STATUS	= 0x802,
>  	INT_CFG1_8125		= 0x7a,
>  	LEDSEL2			= 0x84,
>  	LEDSEL1			= 0x86,
> @@ -578,6 +581,11 @@ enum rtl_register_content {
>  	MagicPacket_v2	= (1 << 16),	/* Wake up when receives a Magic Packet */
>  };
>  
> +enum rtl_isr_version {
> +	RTL_ISR_VER_DEFAULT = 0,
> +	RTL_ISR_VER_8127,
> +};
> +
>  enum rtl_desc_bit {
>  	/* First doubleword. */
>  	DescOwn		= (1 << 31), /* Descriptor is owned by NIC */
> @@ -733,7 +741,6 @@ struct rtl8169_private {
>  	struct pci_dev *pci_dev;
>  	struct net_device *dev;
>  	struct phy_device *phydev;
> -	struct napi_struct napi;
>  	enum mac_version mac_version;
>  	enum rtl_dash_type dash_type;
>  	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
> @@ -745,9 +752,16 @@ struct rtl8169_private {
>  	dma_addr_t RxPhyAddr;
>  	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
> +	struct napi_struct *rtl8169_napi;
> +	unsigned int num_rx_rings;
>  	u16 cp_cmd;
>  	u16 tx_lpi_timer;
>  	u32 irq_mask;
> +	u16 hw_supp_num_rx_queues;

Sized types should only be used if the value directly represents
a register value. Why not simply use unsigned int here?

> +	enum rtl_isr_version hw_supp_isr_ver;
> +	enum rtl_isr_version hw_curr_isr_ver;
> +	u8 irq_nvecs;
> +	bool recheck_desc_ownbit;

Can't this flag be moved to, and handled the same as,
the following bool xxx:1 flags?

>  	int irq;
>  	struct clk *clk;
>  
> @@ -763,6 +777,8 @@ struct rtl8169_private {
>  	unsigned aspm_manageable:1;
>  	unsigned dash_enabled:1;
>  	bool sfp_mode:1;
> +	bool rss_support:1;
> +	bool rss_enable:1;
>  	dma_addr_t counters_phys_addr;
>  	struct rtl8169_counters *counters;
>  	struct rtl8169_tc_offsets tc_offset;
> @@ -2680,6 +2696,21 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
>  	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
>  }
>  
> +static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
> +{
> +	tp->num_rx_rings = 1;
> +
> +	switch (tp->mac_version) {
> +	case RTL_GIGA_MAC_VER_80:
> +		tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
> +		break;
> +	default:
> +		tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
> +		break;
> +	}
> +	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;

I don't see hw_supp_isr_ver used anywhere else, so do we need to store it?

> +}
> +
>  static void rtl_request_firmware(struct rtl8169_private *tp)
>  {
>  	struct rtl_fw *rtl_fw;
> @@ -4266,9 +4297,21 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
>  	netdev_reset_queue(tp->dev);
>  }
>  
> +static void rtl8169_napi_disable(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		napi_disable(&tp->rtl8169_napi[i]);
> +}
> +
> +static void rtl8169_napi_enable(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		napi_enable(&tp->rtl8169_napi[i]);
> +}
> +
>  static void rtl8169_cleanup(struct rtl8169_private *tp)
>  {
> -	napi_disable(&tp->napi);
> +	rtl8169_napi_disable(tp);
>  
>  	/* Give a racing hard_start_xmit a few cycles to complete. */
>  	synchronize_net();
> @@ -4314,7 +4357,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
>  	for (i = 0; i < NUM_RX_DESC; i++)
>  		rtl8169_mark_to_asic(tp->RxDescArray + i);
>  
> -	napi_enable(&tp->napi);
> +	rtl8169_napi_enable(tp);
>  	rtl_hw_start(tp);
>  }
>  
> @@ -4820,7 +4863,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  			goto release_descriptor;
>  		}
>  
> -		skb = napi_alloc_skb(&tp->napi, pkt_size);
> +		skb = napi_alloc_skb(&tp->rtl8169_napi[0], pkt_size);
>  		if (unlikely(!skb)) {
>  			dev->stats.rx_dropped++;
>  			goto release_descriptor;
> @@ -4844,7 +4887,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (skb->pkt_type == PACKET_MULTICAST)
>  			dev->stats.multicast++;
>  
> -		napi_gro_receive(&tp->napi, skb);
> +		napi_gro_receive(&tp->rtl8169_napi[0], skb);
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
>  release_descriptor:
> @@ -4856,7 +4899,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  
>  static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
> -	struct rtl8169_private *tp = dev_instance;
> +	struct napi_struct *napi = dev_instance;
> +	struct rtl8169_private *tp = netdev_priv(napi->dev);

Revers xmas tree

>  	u32 status = rtl_get_events(tp);
>  
>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
> @@ -4873,13 +4917,41 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  		phy_mac_interrupt(tp->phydev);
>  
>  	rtl_irq_disable(tp);
> -	napi_schedule(&tp->napi);
> +	napi_schedule(napi);
>  out:
>  	rtl_ack_events(tp, status);
>  
>  	return IRQ_HANDLED;
>  }
>  
> +static void rtl8169_free_irq(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		struct napi_struct *napi = &tp->rtl8169_napi[i];
> +
> +		pci_free_irq(tp->pci_dev, i, napi);
> +	}
> +}
> +
> +static int rtl8169_request_irq(struct rtl8169_private *tp)
> +{
> +	struct net_device *dev = tp->dev;
> +	struct napi_struct *napi;
> +	int rc = 0;
> +
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		napi = &tp->rtl8169_napi[i];
> +		rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
> +				     NULL, napi, "%s-%d", dev->name, i);
> +		if (rc)
> +			break;
> +	}
> +
> +	if (rc)
> +		rtl8169_free_irq(tp);
> +	return rc;
> +}
> +
>  static void rtl_task(struct work_struct *work)
>  {
>  	struct rtl8169_private *tp =
> @@ -4914,9 +4986,9 @@ static void rtl_task(struct work_struct *work)
>  
>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>  {
> -	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> -	struct net_device *dev = tp->dev;
> -	int work_done;
> +	struct net_device *dev = napi->dev;
> +	struct rtl8169_private *tp = netdev_priv(dev);

Reverse xmas tree

> +	int work_done = 0;
>  
>  	rtl_tx(dev, tp, budget);
>  
> @@ -5035,7 +5107,7 @@ static void rtl8169_up(struct rtl8169_private *tp)
>  	phy_init_hw(tp->phydev);
>  	phy_resume(tp->phydev);
>  	rtl8169_init_phy(tp);
> -	napi_enable(&tp->napi);
> +	rtl8169_napi_enable(tp);
>  	enable_work(&tp->wk.work);
>  	rtl_reset_work(tp);
>  
> @@ -5053,7 +5125,7 @@ static int rtl8169_close(struct net_device *dev)
>  	rtl8169_down(tp);
>  	rtl8169_rx_clear(tp);
>  
> -	free_irq(tp->irq, tp);
> +	rtl8169_free_irq(tp);
>  
>  	phy_disconnect(tp->phydev);
>  
> @@ -5082,7 +5154,6 @@ static int rtl_open(struct net_device *dev)
>  {
>  	struct rtl8169_private *tp = netdev_priv(dev);
>  	struct pci_dev *pdev = tp->pci_dev;
> -	unsigned long irqflags;
>  	int retval = -ENOMEM;
>  
>  	pm_runtime_get_sync(&pdev->dev);
> @@ -5107,8 +5178,7 @@ static int rtl_open(struct net_device *dev)
>  
>  	rtl_request_firmware(tp);
>  
> -	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
> -	retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
> +	retval = rtl8169_request_irq(tp);
>  	if (retval < 0)
>  		goto err_release_fw_2;
>  
> @@ -5125,7 +5195,7 @@ static int rtl_open(struct net_device *dev)
>  	return retval;
>  
>  err_free_irq:
> -	free_irq(tp->irq, tp);
> +	rtl8169_free_irq(tp);
>  err_release_fw_2:
>  	rtl_release_firmware(tp);
>  	rtl8169_rx_clear(tp);
> @@ -5328,7 +5398,9 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
>  
>  static int rtl_alloc_irq(struct rtl8169_private *tp)
>  {
> +	struct pci_dev *pdev = tp->pci_dev;
>  	unsigned int flags;
> +	int nvecs;
>  
>  	switch (tp->mac_version) {
>  	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
> @@ -5344,7 +5416,14 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>  		break;
>  	}
>  
> -	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
> +	nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
> +
> +	if (nvecs < 0)
> +		return nvecs;
> +
> +	tp->irq_nvecs = nvecs;
> +
> +	return 0;
>  }
>  
>  static void rtl_read_mac_address(struct rtl8169_private *tp,
> @@ -5539,6 +5618,17 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static int rtl8169_set_real_num_queue(struct rtl8169_private *tp)
> +{
> +	int retval;

Sometimes you use retval, sometimes rc. This driver typically uses ret.

> +
> +	retval = netif_set_real_num_tx_queues(tp->dev, 1);
> +	if (retval < 0)
> +		return retval;
> +
> +	return netif_set_real_num_rx_queues(tp->dev, tp->num_rx_rings);
> +}
> +
>  static int rtl_jumbo_max(struct rtl8169_private *tp)
>  {
>  	/* Non-GBit versions don't support jumbo frames */
> @@ -5599,6 +5689,12 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
>  	return false;
>  }
>  
> +static void r8169_init_napi(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
> +}
> +
>  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>  	const struct rtl_chip_info *chip;
> @@ -5703,11 +5799,16 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	rtl_hw_reset(tp);
>  
> +	rtl_software_parameter_initialize(tp);
> +
>  	rc = rtl_alloc_irq(tp);
>  	if (rc < 0)
>  		return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
>  
> -	tp->irq = pci_irq_vector(pdev, 0);
> +	tp->rtl8169_napi = devm_kcalloc(&pdev->dev, tp->irq_nvecs,
> +					sizeof(struct napi_struct), GFP_KERNEL);
> +	if (!tp->rtl8169_napi)
> +		return -ENOMEM;
>  
>  	INIT_WORK(&tp->wk.work, rtl_task);
>  	disable_work(&tp->wk.work);
> @@ -5716,7 +5817,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	dev->ethtool_ops = &rtl8169_ethtool_ops;
>  
> -	netif_napi_add(dev, &tp->napi, rtl8169_poll);
> +	if (!tp->rss_support)
> +		netif_napi_add(dev, &tp->rtl8169_napi[0], rtl8169_poll);
> +	else
> +		r8169_init_napi(tp);
>  
>  	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>  			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
> @@ -5778,6 +5882,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	if (jumbo_max)
>  		dev->max_mtu = jumbo_max;
>  
> +	rc = rtl8169_set_real_num_queue(tp);
> +	if (rc < 0)
> +		return dev_err_probe(&pdev->dev, rc, "set tx/rx num failure\n");
> +
>  	rtl_set_irq_mask(tp);
>  
>  	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch net-next v3 2/7] r8169: add support for multi rx queues
  2026-05-13 11:55 ` [Patch net-next v3 2/7] r8169: add support for multi rx queues javen
@ 2026-05-16 22:07   ` Heiner Kallweit
  2026-05-18  7:47     ` Javen
  0 siblings, 1 reply; 17+ messages in thread
From: Heiner Kallweit @ 2026-05-16 22:07 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 13.05.2026 13:55, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> This patch adds support for multi rx queues. RSS requires multi rx
> queues to receive packets. So we need struct rtl8169_rx_ring for each
> queue.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
> Changes in v2:
>  - sort some registers by its number
>  - remove some unused definitions, like RX_DESC_RING_TYPE_MAX
>  - change recheck_desc_ownbit type
>  - remove rdsar_reg in rx_ring struct
>  - opts1 are different in rx_desc and rx_desc_rss, move the judgement
>    to Patch 5/7
> 
> Changes in v3:
>  - remove ring->rx_desc_alloc_size, use constant instead
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 262 ++++++++++++++++------
>  1 file changed, 199 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index e4fc84c97c1e..9dab0fbcca61 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -74,10 +74,13 @@
>  #define NUM_TX_DESC	256	/* Number of Tx descriptor registers */
>  #define NUM_RX_DESC	256	/* Number of Rx descriptor registers */
>  #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
> -#define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
> +#define R8169_RX_RING_BYTES	((NUM_RX_DESC + 1) * sizeof(struct RxDesc))
>  #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
>  #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
> +#define R8169_MAX_RX_QUEUES	8
>  #define R8169_MAX_MSIX_VEC	32
> +#define R8127_MAX_RX_QUEUES	8
> +#define R8169_DEFAULT_RX_QUEUES	1
>  
>  #define OCP_STD_PHY_BASE	0xa400
>  
> @@ -444,6 +447,7 @@ enum rtl8125_registers {
>  	TxPoll_8125		= 0x90,
>  	LEDSEL3			= 0x96,
>  	MAC0_BKP		= 0x19e0,
> +	RDSAR_Q1_LOW		= 0x4000,
>  	RSS_CTRL_8125		= 0x4500,
>  	Q_NUM_CTRL_8125		= 0x4800,
>  	EEE_TXIDLE_TIMER_8125	= 0x6048,
> @@ -736,6 +740,21 @@ enum rtl_dash_type {
>  	RTL_DASH_25_BP,
>  };
>  
> +enum rx_desc_ring_type {
> +	RX_DESC_RING_TYPE_DEFAULT,
> +	RX_DESC_RING_TYPE_RSS,
> +};
> +
> +struct rtl8169_rx_ring {
> +	u32 index;					/* Rx queue index */
> +	u32 cur_rx;					/* Index of next Rx pkt. */
> +	u32 dirty_rx;					/* Index for recycling. */
> +	struct RxDesc *rx_desc_array;			/* array of Rx Desc*/
> +	dma_addr_t rx_desc_phy_addr[NUM_RX_DESC];	/* Rx data buffer physical dma address */
> +	dma_addr_t rx_phy_addr;				/* Rx desc physical address */
> +	struct page *rx_databuff[NUM_RX_DESC];		/* Rx data buffers */
> +};
> +
>  struct rtl8169_private {
>  	void __iomem *mmio_addr;	/* memory map physical address */
>  	struct pci_dev *pci_dev;
> @@ -743,16 +762,13 @@ struct rtl8169_private {
>  	struct phy_device *phydev;
>  	enum mac_version mac_version;
>  	enum rtl_dash_type dash_type;
> -	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
>  	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
>  	u32 dirty_tx;
>  	struct TxDesc *TxDescArray;	/* 256-aligned Tx descriptor ring */
> -	struct RxDesc *RxDescArray;	/* 256-aligned Rx descriptor ring */
>  	dma_addr_t TxPhyAddr;
> -	dma_addr_t RxPhyAddr;
> -	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
>  	struct napi_struct *rtl8169_napi;
> +	struct rtl8169_rx_ring rx_ring[R8169_MAX_RX_QUEUES];
>  	unsigned int num_rx_rings;
>  	u16 cp_cmd;
>  	u16 tx_lpi_timer;
> @@ -2635,9 +2651,27 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static void rtl8169_rx_desc_init(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		memset(ring->rx_desc_array, 0x0, R8169_RX_RING_BYTES);
> +	}
> +}
> +
>  static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
>  {
> -	tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
> +	tp->dirty_tx = 0;
> +	tp->cur_tx = 0;
> +
> +	for (int i = 0; i < tp->hw_supp_num_rx_queues; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		ring->dirty_rx = 0;
> +		ring->cur_rx = 0;
> +		ring->index = i;
> +	}
>  }
>  
>  static void rtl_jumbo_config(struct rtl8169_private *tp)
> @@ -2702,9 +2736,11 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
>  
>  	switch (tp->mac_version) {
>  	case RTL_GIGA_MAC_VER_80:
> +		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
>  		tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
>  		break;
>  	default:
> +		tp->hw_supp_num_rx_queues = R8169_DEFAULT_RX_QUEUES;
>  		tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
>  		break;
>  	}
> @@ -2835,6 +2871,8 @@ static void rtl_set_rx_max_size(struct rtl8169_private *tp)
>  
>  static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
>  {
> +	struct rtl8169_rx_ring *ring = &tp->rx_ring[0];
> +
>  	/*
>  	 * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
>  	 * register to be written before TxDescAddrLow to work.
> @@ -2842,8 +2880,16 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
>  	 */
>  	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
>  	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
> -	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
> -	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
> +	RTL_W32(tp, RxDescAddrHigh, ((u64) ring->rx_phy_addr) >> 32);
> +	RTL_W32(tp, RxDescAddrLow, ((u64) ring->rx_phy_addr) & DMA_BIT_MASK(32));
> +
> +	for (int i = 1; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +		u16 rdsar_reg = (u16)(RDSAR_Q1_LOW + (i - 1) * 8);

Not clear why u16 instead of unsigned int?

> +
> +		RTL_W32(tp, rdsar_reg + 4, ((u64)ring->rx_phy_addr >> 32));
> +		RTL_W32(tp, rdsar_reg, ((u64)ring->rx_phy_addr) & DMA_BIT_MASK(32));
> +	}
>  }
>  
>  static void rtl8169_set_magic_reg(struct rtl8169_private *tp)
> @@ -4190,12 +4236,13 @@ static void rtl8169_mark_to_asic(struct RxDesc *desc)
>  }
>  
>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
> -					  struct RxDesc *desc)
> +					  struct rtl8169_rx_ring *ring, unsigned int index)
>  {
>  	struct device *d = tp_to_dev(tp);
>  	int node = dev_to_node(d);
>  	dma_addr_t mapping;
>  	struct page *data;
> +	struct RxDesc *desc = ring->rx_desc_array + index;

Reverse xmas tree

>  
>  	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
>  	if (!data)
> @@ -4209,55 +4256,100 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  	}
>  
>  	desc->addr = cpu_to_le64(mapping);
> +	ring->rx_desc_phy_addr[index] = mapping;
>  	rtl8169_mark_to_asic(desc);
>  
>  	return data;
>  }
>  
> -static void rtl8169_rx_clear(struct rtl8169_private *tp)
> +static void rtl8169_rx_clear(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
>  {
>  	int i;
>  
> -	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
> +	for (i = 0; i < NUM_RX_DESC && ring->rx_databuff[i]; i++) {
>  		dma_unmap_page(tp_to_dev(tp),
> -			       le64_to_cpu(tp->RxDescArray[i].addr),
> +			       ring->rx_desc_phy_addr[i],
>  			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> -		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
> -		tp->Rx_databuff[i] = NULL;
> -		tp->RxDescArray[i].addr = 0;
> -		tp->RxDescArray[i].opts1 = 0;
> +		__free_pages(ring->rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
> +		ring->rx_databuff[i] = NULL;
> +		ring->rx_desc_phy_addr[i] = 0;
> +		ring->rx_desc_array[i].addr = 0;
> +		ring->rx_desc_array[i].opts1 = 0;
>  	}
>  }
>  
> -static int rtl8169_rx_fill(struct rtl8169_private *tp)
> +static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
>  {
>  	int i;
>  
>  	for (i = 0; i < NUM_RX_DESC; i++) {
>  		struct page *data;
>  
> -		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
> +		data = rtl8169_alloc_rx_data(tp, ring, i);
>  		if (!data) {
> -			rtl8169_rx_clear(tp);
> +			rtl8169_rx_clear(tp, ring);
>  			return -ENOMEM;
>  		}
> -		tp->Rx_databuff[i] = data;
> +		ring->rx_databuff[i] = data;
>  	}
>  
>  	/* mark as last descriptor in the ring */
> -	tp->RxDescArray[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
> +	ring->rx_desc_array[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
> +
> +	return 0;
> +}
> +
> +static int rtl8169_alloc_rx_desc(struct rtl8169_private *tp)
> +{
> +	struct rtl8169_rx_ring *ring;
> +	struct pci_dev *pdev = tp->pci_dev;

Reverse xmas tree. I won't check this any further, please check
yourself and fix in all affected places.

>  
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		ring = &tp->rx_ring[i];
> +		ring->rx_desc_array = dma_alloc_coherent(&pdev->dev,
> +							 R8169_RX_RING_BYTES,
> +							 &ring->rx_phy_addr,
> +							 GFP_KERNEL);
> +		if (!ring->rx_desc_array)
> +			return -1;
> +	}
>  	return 0;
>  }
>  
> +static void rtl8169_free_rx_desc(struct rtl8169_private *tp)
> +{
> +	struct rtl8169_rx_ring *ring;
> +	struct pci_dev *pdev = tp->pci_dev;
> +
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		ring = &tp->rx_ring[i];
> +		if (ring->rx_desc_array) {
> +			dma_free_coherent(&pdev->dev,
> +					  R8169_RX_RING_BYTES,
> +					  ring->rx_desc_array,
> +					  ring->rx_phy_addr);
> +			ring->rx_desc_array = NULL;
> +		}
> +	}
> +}
> +
>  static int rtl8169_init_ring(struct rtl8169_private *tp)
>  {
> +	int retval = 0;
> +
>  	rtl8169_init_ring_indexes(tp);
> +	rtl8169_rx_desc_init(tp);
>  
>  	memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
> -	memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
>  
> -	return rtl8169_rx_fill(tp);
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
> +
> +		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
> +		retval = rtl8169_rx_fill(tp, ring);
> +	}
> +
> +	return retval;
>  }
>  
>  static void rtl8169_unmap_tx_skb(struct rtl8169_private *tp, unsigned int entry)
> @@ -4346,16 +4438,23 @@ static void rtl8169_cleanup(struct rtl8169_private *tp)
>  	rtl8169_init_ring_indexes(tp);
>  }
>  
> -static void rtl_reset_work(struct rtl8169_private *tp)
> +static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
>  {
> -	int i;
> +	for (int i = 0; i < tp->num_rx_rings; i++) {
> +		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>  
> +		for (int j = 0; j < NUM_RX_DESC; j++)
> +			rtl8169_mark_to_asic(ring->rx_desc_array + j);
> +	}
> +}
> +
> +static void rtl_reset_work(struct rtl8169_private *tp)
> +{
>  	netif_stop_queue(tp->dev);
>  
>  	rtl8169_cleanup(tp);
>  
> -	for (i = 0; i < NUM_RX_DESC; i++)
> -		rtl8169_mark_to_asic(tp->RxDescArray + i);
> +	rtl8169_rx_desc_reset(tp);
>  
>  	rtl8169_napi_enable(tp);
>  	rtl_hw_start(tp);
> @@ -4749,6 +4848,11 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
>  	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
>  }
>  
> +static void rtl8169_desc_quirk(struct rtl8169_private *tp)
> +{
> +	RTL_R8(tp, LED_CTRL);
> +}
> +
>  static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
>  		   int budget)
>  {
> @@ -4801,9 +4905,10 @@ static inline int rtl8169_fragmented_frame(u32 status)
>  	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
>  }
>  
> -static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
> +static inline void rtl8169_rx_csum(struct sk_buff *skb,
> +				   struct RxDesc *desc)
>  {
> -	u32 status = opts1 & (RxProtoMask | RxCSFailMask);
> +	u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
>  
>  	if (status == RxProtoTCP || status == RxProtoUDP)
>  		skb->ip_summed = CHECKSUM_UNNECESSARY;
> @@ -4811,22 +4916,58 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
>  		skb_checksum_none_assert(skb);
>  }
>  
> -static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget)
> +static bool rtl8169_check_rx_desc_error(struct net_device *dev,
> +					struct rtl8169_private *tp,
> +					u32 status)
> +{
> +	if (unlikely(status & RxRES)) {
> +		if (status & (RxRWT | RxRUNT))
> +			dev->stats.rx_length_errors++;
> +		if (status & RxCRC)
> +			dev->stats.rx_crc_errors++;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +static void rtl8169_set_desc_dma_addr(struct RxDesc *desc,
> +				      dma_addr_t mapping)
> +{
> +	desc->addr = cpu_to_le64(mapping);
> +}
> +
> +static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
> +		  struct rtl8169_rx_ring *ring, int budget)
>  {
>  	struct device *d = tp_to_dev(tp);
>  	int count;
>  
> -	for (count = 0; count < budget; count++, tp->cur_rx++) {
> -		unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
> -		struct RxDesc *desc = tp->RxDescArray + entry;
> +	for (count = 0; count < budget; count++, ring->cur_rx++) {
> +		unsigned int pkt_size, entry = ring->cur_rx % NUM_RX_DESC;
> +		struct RxDesc *desc = ring->rx_desc_array + entry;
>  		struct sk_buff *skb;
>  		const void *rx_buf;
>  		dma_addr_t addr;
>  		u32 status;
>  
>  		status = le32_to_cpu(READ_ONCE(desc->opts1));
> -		if (status & DescOwn)
> -			break;
> +
> +		if (status & DescOwn) {
> +			if (!tp->recheck_desc_ownbit)
> +				break;
> +
> +			/* Workaround for a hardware issue:
> +			 * Hardware might trigger RX interrupt before the DMA
> +			 * engine fully updates RX desc ownbit in host memory.
> +			 * So we do a quirk and re-read to avoid missing RX
> +			 * packets.
> +			 */
> +			tp->recheck_desc_ownbit = false;
> +			rtl8169_desc_quirk(tp);

Do we need this helper for one register read?
Description says "re-read to avoid missing RX", but you read the LED_CTRL
register. This needs an explanation, best extend the comment accordingly.
Do you have to read just a random register, or has it a specific reason
why you read exactly this register?

> +			status = le32_to_cpu(READ_ONCE(desc->opts1));
> +			if (status & DescOwn)
> +				break;
> +		}
>  
>  		/* This barrier is needed to keep us from reading
>  		 * any other fields out of the Rx descriptor until
> @@ -4834,20 +4975,14 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		 */
>  		dma_rmb();
>  
> -		if (unlikely(status & RxRES)) {
> +		if (rtl8169_check_rx_desc_error(dev, tp, status)) {
>  			if (net_ratelimit())
>  				netdev_warn(dev, "Rx ERROR. status = %08x\n",
>  					    status);
>  			dev->stats.rx_errors++;
> -			if (status & (RxRWT | RxRUNT))
> -				dev->stats.rx_length_errors++;
> -			if (status & RxCRC)
> -				dev->stats.rx_crc_errors++;
>  
>  			if (!(dev->features & NETIF_F_RXALL))
>  				goto release_descriptor;
> -			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
> -				goto release_descriptor;
>  		}
>  
>  		pkt_size = status & GENMASK(13, 0);
> @@ -4863,14 +4998,14 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  			goto release_descriptor;
>  		}
>  
> -		skb = napi_alloc_skb(&tp->rtl8169_napi[0], pkt_size);
> +		skb = napi_alloc_skb(&tp->rtl8169_napi[ring->index], pkt_size);
>  		if (unlikely(!skb)) {
>  			dev->stats.rx_dropped++;
>  			goto release_descriptor;
>  		}
>  
> -		addr = le64_to_cpu(desc->addr);
> -		rx_buf = page_address(tp->Rx_databuff[entry]);
> +		addr = ring->rx_desc_phy_addr[entry];
> +		rx_buf = page_address(ring->rx_databuff[entry]);
>  
>  		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
>  		prefetch(rx_buf);
> @@ -4879,7 +5014,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		skb->len = pkt_size;
>  		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
>  
> -		rtl8169_rx_csum(skb, status);
> +		rtl8169_rx_csum(skb, desc);
>  		skb->protocol = eth_type_trans(skb, dev);
>  
>  		rtl8169_rx_vlan_tag(desc, skb);
> @@ -4887,10 +5022,11 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (skb->pkt_type == PACKET_MULTICAST)
>  			dev->stats.multicast++;
>  
> -		napi_gro_receive(&tp->rtl8169_napi[0], skb);
> +		napi_gro_receive(&tp->rtl8169_napi[ring->index], skb);
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
>  release_descriptor:
> +		rtl8169_set_desc_dma_addr(desc, ring->rx_desc_phy_addr[entry]);
>  		rtl8169_mark_to_asic(desc);
>  	}
>  
> @@ -4917,6 +5053,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  		phy_mac_interrupt(tp->phydev);
>  
>  	rtl_irq_disable(tp);
> +	tp->recheck_desc_ownbit = true;
>  	napi_schedule(napi);
>  out:
>  	rtl_ack_events(tp, status);
> @@ -4992,7 +5129,8 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>  
>  	rtl_tx(dev, tp, budget);
>  
> -	work_done = rtl_rx(dev, tp, budget);
> +	for (int i = 0; i < tp->num_rx_rings; i++)
> +		work_done += rtl_rx(dev, tp, &tp->rx_ring[i], budget);
>  
>  	if (work_done < budget && napi_complete_done(napi, work_done))
>  		rtl_irq_enable(tp);
> @@ -5120,21 +5258,19 @@ static int rtl8169_close(struct net_device *dev)
>  	struct pci_dev *pdev = tp->pci_dev;
>  
>  	pm_runtime_get_sync(&pdev->dev);
> -
>  	netif_stop_queue(dev);
>  	rtl8169_down(tp);
> -	rtl8169_rx_clear(tp);
> +	for (int i = 0; i < tp->num_rx_rings; i++)
> +		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
>  
>  	rtl8169_free_irq(tp);
>  
>  	phy_disconnect(tp->phydev);
>  
> -	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
> -			  tp->RxPhyAddr);
>  	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
>  			  tp->TxPhyAddr);
>  	tp->TxDescArray = NULL;
> -	tp->RxDescArray = NULL;
> +	rtl8169_free_rx_desc(tp);
>  
>  	pm_runtime_put_sync(&pdev->dev);
>  
> @@ -5165,13 +5301,11 @@ static int rtl_open(struct net_device *dev)
>  	tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES,
>  					     &tp->TxPhyAddr, GFP_KERNEL);
>  	if (!tp->TxDescArray)
> -		goto out;
> -
> -	tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES,
> -					     &tp->RxPhyAddr, GFP_KERNEL);
> -	if (!tp->RxDescArray)
>  		goto err_free_tx_0;
>  
> +	if (rtl8169_alloc_rx_desc(tp) < 0)
> +		goto err_free_rx_1;
> +
>  	retval = rtl8169_init_ring(tp);
>  	if (retval < 0)
>  		goto err_free_rx_1;
> @@ -5198,11 +5332,10 @@ static int rtl_open(struct net_device *dev)
>  	rtl8169_free_irq(tp);
>  err_release_fw_2:
>  	rtl_release_firmware(tp);
> -	rtl8169_rx_clear(tp);
> +	for (int i = 0; i < tp->num_rx_rings; i++)
> +		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
>  err_free_rx_1:
> -	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
> -			  tp->RxPhyAddr);
> -	tp->RxDescArray = NULL;
> +	rtl8169_free_rx_desc(tp);
>  err_free_tx_0:
>  	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
>  			  tp->TxPhyAddr);
> @@ -5705,7 +5838,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	u32 txconfig;
>  	u32 xid;
>  
> -	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
> +	dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(*tp),
> +				      1,
> +				      R8169_MAX_RX_QUEUES);
> +
>  	if (!dev)
>  		return -ENOMEM;
>  


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch net-next v3 3/7] r8169: add support for new interrupt mapping
  2026-05-13 11:55 ` [Patch net-next v3 3/7] r8169: add support for new interrupt mapping javen
@ 2026-05-16 22:07   ` Heiner Kallweit
  2026-05-18  8:21     ` Javen
  0 siblings, 1 reply; 17+ messages in thread
From: Heiner Kallweit @ 2026-05-16 22:07 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 13.05.2026 13:55, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> To support RSS, the number of hardware interrupt bits should match the
> interrupt of software. So we add support for new interrupt mapping here.
> ISR_VER_MAP_REG is the hardware register to indicate interrupt status.
> IMR_SET_VEC_MAP_REG is interrupt mask which is set to enable irq.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
> Changes in v2:
>  - no changes
> 
> Changes in v3:
>  - init index in napi_struct and get message_id from index
>  - move rtl8169_disable_hw_interrupt_msix directly before the call to
>    napi_schedule()
>  - change the condition in rtl8169_request_irq when RTL_VEC_MAP_ENABLE
>    enabled, use rtl8169_interrupt_msix
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 165 ++++++++++++++++++++--
>  1 file changed, 151 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 9dab0fbcca61..f259cc0cee37 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -78,6 +78,7 @@
>  #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
>  #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
>  #define R8169_MAX_RX_QUEUES	8
> +#define R8127_MAX_TX_QUEUES	8
>  #define R8169_MAX_MSIX_VEC	32
>  #define R8127_MAX_RX_QUEUES	8
>  #define R8169_DEFAULT_RX_QUEUES	1
> @@ -451,8 +452,13 @@ enum rtl8125_registers {
>  	RSS_CTRL_8125		= 0x4500,
>  	Q_NUM_CTRL_8125		= 0x4800,
>  	EEE_TXIDLE_TIMER_8125	= 0x6048,
> +	IMR_CLEAR_VEC_MAP_REG	= 0x0d00,
> +	ISR_VEC_MAP_REG		= 0x0d04,
> +	IMR_SET_VEC_MAP_REG	= 0x0d0c,
>  };
>  
> +#define MSIX_ID_VEC_MAP_LINKCHG	29
> +#define RTL_VEC_MAP_ENABLE	BIT(0)
>  #define LEDSEL_MASK_8125	0x23f
>  
>  #define RX_VLAN_INNER_8125	BIT(22)
> @@ -583,6 +589,9 @@ enum rtl_register_content {
>  
>  	/* magic enable v2 */
>  	MagicPacket_v2	= (1 << 16),	/* Wake up when receives a Magic Packet */
> +#define	ISRIMR_LINKCHG	BIT(29)
> +#define	ISRIMR_TOK_Q0	BIT(8)
> +#define	ISRIMR_ROK_Q0	BIT(0)
>  };
>  
>  enum rtl_isr_version {
> @@ -778,6 +787,7 @@ struct rtl8169_private {
>  	enum rtl_isr_version hw_curr_isr_ver;
>  	u8 irq_nvecs;
>  	bool recheck_desc_ownbit;
> +	unsigned int features;

Why do you add an extra bitmap here? Why not use bool xxx:1 like for other flags?

>  	int irq;
>  	struct clk *clk;
>  
> @@ -1676,26 +1686,36 @@ static u32 rtl_get_events(struct rtl8169_private *tp)
>  
>  static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)
>  {
> -	if (rtl_is_8125(tp))
> +	if (rtl_is_8125(tp)) {
>  		RTL_W32(tp, IntrStatus_8125, bits);
> -	else
> +		if (tp->features & RTL_VEC_MAP_ENABLE)

Looks to me like this check is equivalent to checking
hw_curr_isr_ver > RTL_ISR_VER_DEFAULT, or?
If yes, then this additional flag doesn't seem to be needed.

> +			RTL_W32(tp, ISR_VEC_MAP_REG, 0xffffffff);
> +	} else {
>  		RTL_W16(tp, IntrStatus, bits);
> +	}
>  }
>  
>  static void rtl_irq_disable(struct rtl8169_private *tp)
>  {
> -	if (rtl_is_8125(tp))
> +	if (rtl_is_8125(tp)) {
>  		RTL_W32(tp, IntrMask_8125, 0);
> -	else
> +		if (tp->features & RTL_VEC_MAP_ENABLE)
> +			RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, 0xffffffff);
> +	} else {
>  		RTL_W16(tp, IntrMask, 0);
> +	}
>  }
>  
>  static void rtl_irq_enable(struct rtl8169_private *tp)
>  {
> -	if (rtl_is_8125(tp))
> -		RTL_W32(tp, IntrMask_8125, tp->irq_mask);
> -	else
> +	if (rtl_is_8125(tp)) {
> +		if (tp->features & RTL_VEC_MAP_ENABLE)
> +			RTL_W32(tp, IMR_SET_VEC_MAP_REG, tp->irq_mask);
> +		else
> +			RTL_W32(tp, IntrMask_8125, tp->irq_mask);
> +	} else {
>  		RTL_W16(tp, IntrMask, tp->irq_mask);
> +	}
>  }
>  
>  static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
> @@ -5070,6 +5090,42 @@ static void rtl8169_free_irq(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static void rtl8169_disable_hw_interrupt_msix(struct rtl8169_private *tp, int message_id)
> +{
> +	RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, BIT(message_id));
> +}
> +
> +static void rtl8169_clear_hw_isr(struct rtl8169_private *tp, int message_id)
> +{
> +	RTL_W32(tp, ISR_VEC_MAP_REG, BIT(message_id));
> +}
> +
> +static void rtl8169_enable_hw_interrupt_msix(struct rtl8169_private *tp, int message_id)
> +{
> +	RTL_W32(tp, IMR_SET_VEC_MAP_REG, BIT(message_id));
> +}
> +
> +static irqreturn_t rtl8169_interrupt_msix(int irq, void *dev_instance)
> +{
> +	struct napi_struct *napi = dev_instance;
> +	struct net_device *dev = napi->dev;
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	int message_id = napi->index;
> +
> +	rtl8169_clear_hw_isr(tp, message_id);
> +
> +	if (message_id == MSIX_ID_VEC_MAP_LINKCHG) {
> +		phy_mac_interrupt(tp->phydev);
> +		return IRQ_HANDLED;
> +	}
> +
> +	tp->recheck_desc_ownbit = true;
> +	rtl8169_disable_hw_interrupt_msix(tp, message_id);
> +	napi_schedule(napi);
> +
> +	return IRQ_HANDLED;
> +}
> +
>  static int rtl8169_request_irq(struct rtl8169_private *tp)
>  {
>  	struct net_device *dev = tp->dev;
> @@ -5078,8 +5134,12 @@ static int rtl8169_request_irq(struct rtl8169_private *tp)
>  
>  	for (int i = 0; i < tp->irq_nvecs; i++) {
>  		napi = &tp->rtl8169_napi[i];
> -		rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
> -				     NULL, napi, "%s-%d", dev->name, i);
> +		if (tp->features & RTL_VEC_MAP_ENABLE)
> +			rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt_msix,
> +					     NULL, napi, "%s-%d", dev->name, i);
> +		else
> +			rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
> +					     NULL, napi, "%s-%d", dev->name, i);
>  		if (rc)
>  			break;
>  	}
> @@ -5523,10 +5583,16 @@ static const struct net_device_ops rtl_netdev_ops = {
>  
>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
>  {
> -	tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
> +	if (tp->features & RTL_VEC_MAP_ENABLE) {
> +		tp->irq_mask = ISRIMR_LINKCHG | ISRIMR_TOK_Q0;
> +		for (int i = 0; i < tp->num_rx_rings; i++)
> +			tp->irq_mask |= ISRIMR_ROK_Q0 << i;
> +	} else {
> +		tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
>  
> -	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
> -		tp->irq_mask |= SYSErr | RxFIFOOver;
> +		if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
> +			tp->irq_mask |= SYSErr | RxFIFOOver;
> +	}
>  }
>  
>  static int rtl_alloc_irq(struct rtl8169_private *tp)
> @@ -5555,6 +5621,8 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>  		return nvecs;
>  
>  	tp->irq_nvecs = nvecs;
> +	if (nvecs > 1)
> +		tp->features |= RTL_VEC_MAP_ENABLE;
>  
>  	return 0;
>  }
> @@ -5822,10 +5890,79 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
>  	return false;
>  }
>  
> +static int rtl8169_poll_msix_rx(struct napi_struct *napi, int budget)
> +{
> +	struct net_device *dev = napi->dev;
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	const int message_id = napi->index;
> +	int work_done = 0;
> +
> +	if (message_id < tp->num_rx_rings)
> +		work_done += rtl_rx(dev, tp, &tp->rx_ring[message_id], budget);
> +
> +	if (work_done < budget && napi_complete_done(napi, work_done))
> +		rtl8169_enable_hw_interrupt_msix(tp, message_id);
> +
> +	return work_done;
> +}
> +
> +static int rtl8169_poll_msix_tx(struct napi_struct *napi, int budget)
> +{
> +	struct net_device *dev = napi->dev;
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	const int message_id = napi->index;
> +	int tx_ring_idx = message_id - 8;
> +	unsigned int work_done = 0;
> +
> +	if (tx_ring_idx >= 0)
> +		rtl_tx(dev, tp, budget);
> +
> +	if (work_done < budget && napi_complete_done(napi, work_done))
> +		rtl8169_enable_hw_interrupt_msix(tp, message_id);
> +
> +	return work_done;
> +}
> +
> +static int rtl8169_poll_msix_other(struct napi_struct *napi, int budget)
> +{
> +	struct net_device *dev = napi->dev;
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	const int message_id = napi - tp->rtl8169_napi;

Why not use napi->index here too?

> +
> +	napi_complete_done(napi, budget);
> +	rtl8169_enable_hw_interrupt_msix(tp, message_id);
> +
> +	return 1;
> +}
> +
>  static void r8169_init_napi(struct rtl8169_private *tp)
>  {
> -	for (int i = 0; i < tp->irq_nvecs; i++)
> -		netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		if (tp->features & RTL_VEC_MAP_ENABLE) {
> +			switch (tp->hw_curr_isr_ver) {
> +			case RTL_ISR_VER_8127:

A comment describing the RTL8127 MSI-X vector layout would be helpful here.
Otherwise the following is hard to understand.

> +				if (i < R8127_MAX_RX_QUEUES)
> +					netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
> +						       rtl8169_poll_msix_rx);
> +				else if (i >= R8127_MAX_RX_QUEUES &&
> +					 i < (R8127_MAX_RX_QUEUES +
> +					 R8127_MAX_TX_QUEUES))
> +					netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
> +						       rtl8169_poll_msix_tx);
> +				else
> +					netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
> +						       rtl8169_poll_msix_other);
> +				break;
> +			default:
> +				netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
> +					       rtl8169_poll);
> +				break;
> +			}
> +		} else {
> +			netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
> +		}

This seems to be unnecessarily complex and can be simplified.

> +		tp->rtl8169_napi[i].index = i;
> +	}
>  }
>  
>  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch net-next v3 5/7] r8169: add support and enable rss
  2026-05-13 11:55 ` [Patch net-next v3 5/7] r8169: add support and enable rss javen
  2026-05-15  0:21   ` Jakub Kicinski
@ 2026-05-16 22:07   ` Heiner Kallweit
  2026-05-18 11:17     ` Javen
  1 sibling, 1 reply; 17+ messages in thread
From: Heiner Kallweit @ 2026-05-16 22:07 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 13.05.2026 13:55, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> This patch adds support and enable rss for RTL8127.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
> Changes in v2:
>  - some changes moved from Patch 2/7
> Changes in v3:
>  - add struct rtl8169_rss_data. Allocate it dynamically when needed.
>  - define rss_key as an u32 array
>  - replace some magic bit numbers in rtl8169_set_rss_hash_opt() and
>    rtl8125_set_rx_q_num()
>  - use union to combine different rx descriptor, refactor struct RxDesc
>  - remove dead code from rtl8169_double_check_rss_support()
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 405 ++++++++++++++++++++--
>  1 file changed, 371 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index b9c505e4bc0a..b90375cef724 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -82,6 +82,19 @@
>  #define R8169_MAX_MSIX_VEC	32
>  #define R8127_MAX_RX_QUEUES	8
>  #define R8169_DEFAULT_RX_QUEUES	1
> +#define R8127_MAX_IRQ		32
> +#define R8127_MIN_IRQ		30
> +#define R8169_IRQ_DEFAULT	1

These name are somewhat misleading. Would something like
R8127_MAX_NUM_IRQVEC better describe the usage?

> +#define RTL_RSS_KEY_SIZE	40
> +#define RSS_CPU_NUM_MASK	GENMASK(18, 16)
> +#define RSS_HASH_MASK		GENMASK(10, 8)
> +#define RTL_MAX_INDIRECTION_TABLE_ENTRIES 128
> +#define RXS_RSS_UDP		BIT(27)
> +#define RXS_RSS_IPV4		BIT(28)
> +#define RXS_RSS_IPV6		BIT(29)
> +#define RXS_RSS_TCP		BIT(30)
> +#define RXS_RSS_L3_TYPE_MASK	(RXS_RSS_IPV4 | RXS_RSS_IPV6)
> +#define RXS_RSS_L4_TYPE_MASK	(RXS_RSS_TCP | RXS_RSS_UDP)
>  
>  #define OCP_STD_PHY_BASE	0xa400
>  
> @@ -592,6 +605,25 @@ enum rtl_register_content {
>  #define	ISRIMR_LINKCHG	BIT(29)
>  #define	ISRIMR_TOK_Q0	BIT(8)
>  #define	ISRIMR_ROK_Q0	BIT(0)
> +#define RTL_DESC_TYPE_CTRL		0xd8
> +#define RSS_KEY_REG			0x4600
> +#define RSS_INDIRECTION_TBL_REG		0x4700
> +#define RSS_CTRL_TCP_IPV4_SUPP		BIT(0)
> +#define RTL_DESC_TYPE_RSS		BIT(1)
> +#define RSS_CTRL_IPV4_SUPP		BIT(1)
> +#define RSS_CTRL_TCP_IPV6_SUPP		BIT(2)
> +#define RSS_CTRL_IPV6_SUPP		BIT(3)
> +#define RSS_CTRL_IPV6_EXT_SUPP		BIT(4)
> +#define RSS_CTRL_TCP_IPV6_EXT_SUPP	BIT(5)
> +#define RSS_CTRL_UDP_IPV4_SUPP		BIT(6)
> +#define RSS_CTRL_UDP_IPV6_SUPP		BIT(7)
> +#define RSS_CTRL_UDP_IPV6_EXT_SUPP	BIT(8)
> +#define RTL_RSS_FLAG_HASH_UDP_IPV4	BIT(0)
> +#define RTL_RSS_FLAG_HASH_UDP_IPV6	BIT(1)
> +#define	RX_RES_RSS			BIT(22)
> +#define	RX_RUNT_RSS			BIT(21)
> +#define	RX_CRC_RSS			BIT(20)
> +#define RTL_RX_Q_NUM_MASK		GENMASK(4, 2)
>  };
>  
>  enum rtl_isr_version {
> @@ -654,6 +686,11 @@ enum rtl_rx_desc_bit {
>  #define RxProtoIP	(PID1 | PID0)
>  #define RxProtoMask	RxProtoIP
>  
> +#define	RX_UDPT_DESC_RSS	BIT(19)
> +#define	RX_TCPT_DESC_RSS	BIT(18)
> +#define	RX_UDPF_DESC_RSS	BIT(16) /* UDP/IP checksum failed */
> +#define	RX_TCPF_DESC_RSS	BIT(15) /* TCP/IP checksum failed */
> +
>  	IPFail		= (1 << 16), /* IP checksum failed */
>  	UDPFail		= (1 << 15), /* UDP/IP checksum failed */
>  	TCPFail		= (1 << 14), /* TCP/IP checksum failed */
> @@ -675,9 +712,27 @@ struct TxDesc {
>  };
>  
>  struct RxDesc {
> -	__le32 opts1;
> -	__le32 opts2;
> -	__le64 addr;
> +	union {
> +		/* RX_DESC_RING_TYPE_DEFAULT */
> +		struct {
> +			__le32 opts1;
> +			__le32 opts2;
> +			__le64 addr;
> +		};
> +
> +		/* RX_DESC_RING_TYPE_RSS */
> +		struct {
> +			union {
> +				__le64 rss_addr;
> +				struct {
> +					__le32 rss_info;
> +					__le32 rss_result;
> +				} rss_dword;
> +			};
> +			__le32 rss_opts2;
> +			__le32 rss_opts1;
> +		};
> +	};
>  };
>  
>  struct ring_info {
> @@ -764,6 +819,13 @@ struct rtl8169_rx_ring {
>  	struct page *rx_databuff[NUM_RX_DESC];		/* Rx data buffers */
>  };
>  
> +struct rtl8169_rss_data {
> +	u32 rss_flags;
> +	u32 rss_key[RTL_RSS_KEY_SIZE / sizeof(u32)];
> +	u8 rss_indir_tbl[RTL_MAX_INDIRECTION_TABLE_ENTRIES];
> +	u8 hw_supp_indir_tbl_entries;

Like in other places: Why not use unsigned int here?

> +};
> +
>  struct rtl8169_private {
>  	void __iomem *mmio_addr;	/* memory map physical address */
>  	struct pci_dev *pci_dev;
> @@ -783,9 +845,11 @@ struct rtl8169_private {
>  	u16 tx_lpi_timer;
>  	u32 irq_mask;
>  	u16 hw_supp_num_rx_queues;
> +	struct rtl8169_rss_data *rss_data;
>  	enum rtl_isr_version hw_supp_isr_ver;
>  	enum rtl_isr_version hw_curr_isr_ver;
>  	u8 irq_nvecs;
> +	u8 init_rx_desc_type;

Type should be the respective enum, not u8.

>  	bool recheck_desc_ownbit;
>  	unsigned int features;
>  	int irq;
> @@ -1620,6 +1684,13 @@ static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static bool rtl_check_rss_support(struct rtl8169_private *tp)
> +{
> +	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
> +		return true;

Typically an empty line is inserted before the return statement.

> +	return false;
> +}
> +
>  static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
>  {
>  	switch (tp->mac_version) {
> @@ -1919,9 +1990,20 @@ static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
>  		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
>  }
>  
> -static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
> +static void rtl8169_rx_vlan_tag(struct rtl8169_private *tp,
> +				struct RxDesc *desc,
> +				struct sk_buff *skb)
>  {
> -	u32 opts2 = le32_to_cpu(desc->opts2);
> +	u32 opts2;
> +
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		opts2 = le32_to_cpu(desc->rss_opts2);
> +		break;
> +	default:
> +		opts2 = le32_to_cpu(desc->opts2);
> +		break;
> +	}
>  
>  	if (opts2 & RxVlanTag)
>  		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
> @@ -2750,6 +2832,14 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
>  	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
>  }
>  
> +static void rtl8169_init_rss(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->rss_data->hw_supp_indir_tbl_entries; i++)
> +		tp->rss_data->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, tp->num_rx_rings);
> +
> +	netdev_rss_key_fill(tp->rss_data->rss_key, RTL_RSS_KEY_SIZE);
> +}
> +
>  static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
>  {
>  	tp->num_rx_rings = 1;
> @@ -2757,6 +2847,7 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
>  	switch (tp->mac_version) {
>  	case RTL_GIGA_MAC_VER_80:
>  		tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
> +		tp->rss_data->hw_supp_indir_tbl_entries = RTL_MAX_INDIRECTION_TABLE_ENTRIES;
>  		tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
>  		break;
>  	default:
> @@ -2764,6 +2855,7 @@ static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
>  		tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
>  		break;
>  	}
> +	tp->init_rx_desc_type = RX_DESC_RING_TYPE_DEFAULT;
>  	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
>  }
>  
> @@ -2889,6 +2981,72 @@ static void rtl_set_rx_max_size(struct rtl8169_private *tp)
>  	RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
>  }
>  
> +static void rtl8169_store_rss_key(struct rtl8169_private *tp)
> +{
> +	u32 *rss_key = tp->rss_data->rss_key;
> +	const u16 rss_key_reg = RSS_KEY_REG;
> +	u32 num_entries = RTL_RSS_KEY_SIZE / sizeof(u32);
> +
> +	/* Write redirection table to HW */
> +	for (int i = 0; i < num_entries; i++)
> +		RTL_W32(tp, rss_key_reg + (i * 4), rss_key[i]);
> +}
> +
> +static void rtl8169_store_reta(struct rtl8169_private *tp)
> +{
> +	u16 indir_tbl_reg = RSS_INDIRECTION_TBL_REG;
> +	u32 i, reta_entries = tp->rss_data->hw_supp_indir_tbl_entries;
> +	u32 reta = 0;
> +	u8 *indir_tbl = tp->rss_data->rss_indir_tbl;
> +
> +	/* Write redirection table to HW */
> +	for (i = 0; i < reta_entries; i++) {
> +		reta |= indir_tbl[i] << (i & 0x3) * 8;
> +		if ((i & 3) == 3) {
> +			RTL_W32(tp, indir_tbl_reg, reta);
> +			indir_tbl_reg += 4;
> +			reta = 0;
> +		}
> +	}
> +}
> +
> +static int rtl8169_set_rss_hash_opt(struct rtl8169_private *tp)
> +{
> +	u32 rss_flags = tp->rss_data->rss_flags;
> +	u32 rss_ctrl;
> +
> +	rss_ctrl = FIELD_PREP(RSS_CPU_NUM_MASK, ilog2(tp->num_rx_rings));
> +
> +	/* Perform hash on these packet types */
> +	rss_ctrl |= RSS_CTRL_TCP_IPV4_SUPP
> +		 | RSS_CTRL_IPV4_SUPP
> +		 | RSS_CTRL_IPV6_SUPP
> +		 | RSS_CTRL_IPV6_EXT_SUPP
> +		 | RSS_CTRL_TCP_IPV6_SUPP
> +		 | RSS_CTRL_TCP_IPV6_EXT_SUPP;
> +
> +	if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV4)
> +		rss_ctrl |= RSS_CTRL_UDP_IPV4_SUPP;
> +
> +	if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV6)
> +		rss_ctrl |= RSS_CTRL_UDP_IPV6_SUPP |
> +			    RSS_CTRL_UDP_IPV6_EXT_SUPP;
> +
> +	rss_ctrl |= FIELD_PREP(RSS_HASH_MASK,
> +			       ilog2(tp->rss_data->hw_supp_indir_tbl_entries));
> +
> +	RTL_W32(tp, RSS_CTRL_8125, rss_ctrl);
> +
> +	return 0;
> +}
> +
> +static void rtl_set_rss_config(struct rtl8169_private *tp)
> +{
> +	rtl8169_set_rss_hash_opt(tp);
> +	rtl8169_store_reta(tp);
> +	rtl8169_store_rss_key(tp);
> +}
> +
>  static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
>  {
>  	struct rtl8169_rx_ring *ring = &tp->rx_ring[0];
> @@ -3955,6 +4113,18 @@ DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
>  	return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
>  }
>  
> +static void rtl8125_set_rx_q_num(struct rtl8169_private *tp)
> +{
> +	u16 q_ctrl;
> +	u16 rx_q_num;
> +
> +	rx_q_num = (u16)ilog2(tp->num_rx_rings);
> +	q_ctrl = RTL_R16(tp, Q_NUM_CTRL_8125);
> +	q_ctrl &= ~RTL_RX_Q_NUM_MASK;
> +	q_ctrl |= FIELD_PREP(RTL_RX_Q_NUM_MASK, rx_q_num);
> +	RTL_W16(tp, Q_NUM_CTRL_8125, q_ctrl);
> +}
> +
>  static void rtl8169_hw_enable_vec_mapping(struct rtl8169_private *tp)
>  {
>  	u8 tmp;
> @@ -3994,6 +4164,13 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
>  	    tp->mac_version == RTL_GIGA_MAC_VER_80)
>  		RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02);
>  
> +	/* enable rx descriptor type v4 and set queue num for rss*/
> +	if (tp->rss_enable) {
> +		rtl8125_set_rx_q_num(tp);
> +		RTL_W8(tp, RTL_DESC_TYPE_CTRL,
> +		       RTL_R8(tp, RTL_DESC_TYPE_CTRL) | RTL_DESC_TYPE_RSS);
> +	}
> +
>  	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
>  		r8168_mac_ocp_modify(tp, 0xe614, 0x0f00, 0x0f00);
>  	else if (tp->mac_version == RTL_GIGA_MAC_VER_70)
> @@ -4230,6 +4407,12 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
>  	rtl_hw_aspm_clkreq_enable(tp, true);
>  	rtl_set_rx_max_size(tp);
>  	rtl_set_rx_tx_desc_registers(tp);
> +	if (rtl_is_8125(tp)) {
> +		if (tp->rss_enable)
> +			rtl_set_rss_config(tp);
> +		else
> +			RTL_W32(tp, RSS_CTRL_8125, 0x00);
> +	}
>  	rtl_lock_config_regs(tp);
>  
>  	rtl_jumbo_config(tp);
> @@ -4257,14 +4440,26 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
>  	return 0;
>  }
>  
> -static void rtl8169_mark_to_asic(struct RxDesc *desc)
> +static void rtl8169_mark_to_asic(struct rtl8169_private *tp, struct RxDesc *desc)
>  {
> -	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
> +	u32 eor;
>  
> -	desc->opts2 = 0;
> -	/* Force memory writes to complete before releasing descriptor */
> -	dma_wmb();
> -	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		eor = le32_to_cpu(desc->rss_opts1) & RingEnd;
> +		desc->rss_opts2 = 0;
> +		/* Force memory writes to complete before releasing descriptor */
> +		dma_wmb();
> +		WRITE_ONCE(desc->rss_opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
> +		break;
> +	default:
> +		eor = le32_to_cpu(desc->opts1) & RingEnd;
> +		desc->opts2 = 0;
> +		/* Force memory writes to complete before releasing descriptor */
> +		dma_wmb();
> +		WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
> +		break;
> +	}
>  }
>  
>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
> @@ -4287,9 +4482,12 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  		return NULL;
>  	}
>  
> -	desc->addr = cpu_to_le64(mapping);
>  	ring->rx_desc_phy_addr[index] = mapping;
> -	rtl8169_mark_to_asic(desc);
> +	if (tp->init_rx_desc_type == RX_DESC_RING_TYPE_RSS)
> +		desc->rss_addr = cpu_to_le64(mapping);
> +	else
> +		desc->addr = cpu_to_le64(mapping);
> +	rtl8169_mark_to_asic(tp, desc);
>  
>  	return data;
>  }
> @@ -4310,6 +4508,18 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp, struct rtl8169_rx_ring
>  	}
>  }
>  
> +static void rtl8169_mark_as_last_descriptor(struct rtl8169_private *tp, struct RxDesc *desc)
> +{
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		desc->rss_opts1 |= cpu_to_le32(RingEnd);
> +		break;
> +	default:
> +		desc->opts1 |= cpu_to_le32(RingEnd);
> +		break;
> +	}
> +}
> +
>  static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *ring)
>  {
>  	int i;
> @@ -4326,7 +4536,7 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp, struct rtl8169_rx_ring *r
>  	}
>  
>  	/* mark as last descriptor in the ring */
> -	ring->rx_desc_array[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
> +	rtl8169_mark_as_last_descriptor(tp, &ring->rx_desc_array[NUM_RX_DESC - 1]);
>  
>  	return 0;
>  }
> @@ -4476,7 +4686,7 @@ static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
>  		struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>  
>  		for (int j = 0; j < NUM_RX_DESC; j++)
> -			rtl8169_mark_to_asic(ring->rx_desc_array + j);
> +			rtl8169_mark_to_asic(tp, ring->rx_desc_array + j);
>  	}
>  }
>  
> @@ -4937,35 +5147,104 @@ static inline int rtl8169_fragmented_frame(u32 status)
>  	return (status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag);
>  }
>  
> -static inline void rtl8169_rx_csum(struct sk_buff *skb,
> +static inline void rtl8169_rx_hash(struct rtl8169_private *tp,
> +				   struct RxDesc *desc,
> +				   struct sk_buff *skb)
> +{
> +	u32 rss_header_info;
> +	u32 hash_val;
> +
> +	if (!(tp->dev->features & NETIF_F_RXHASH))
> +		return;
> +
> +	rss_header_info = le32_to_cpu(desc->rss_dword.rss_info);
> +
> +	if (!(rss_header_info & RXS_RSS_L3_TYPE_MASK))
> +		return;
> +
> +	hash_val = le32_to_cpu(desc->rss_dword.rss_result);
> +
> +	skb_set_hash(skb, hash_val,
> +		     (RXS_RSS_L4_TYPE_MASK & rss_header_info) ?
> +		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
> +}
> +
> +static inline void rtl8169_rx_csum(struct rtl8169_private *tp,
> +				   struct sk_buff *skb,
>  				   struct RxDesc *desc)
>  {
> -	u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
> +	bool csum_ok = false;
> +	u32 opts1;
>  
> -	if (status == RxProtoTCP || status == RxProtoUDP)
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		opts1 = le32_to_cpu(desc->rss_opts1);
> +		if (((opts1 & RX_TCPT_DESC_RSS) && !(opts1 & RX_TCPF_DESC_RSS)) ||
> +		    ((opts1 & RX_UDPT_DESC_RSS) && !(opts1 & RX_UDPF_DESC_RSS)))
> +			csum_ok = true;
> +		break;
> +	default:
> +		opts1 = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
> +		if (opts1 == RxProtoTCP || opts1 == RxProtoUDP)
> +			csum_ok = true;
> +		break;
> +	}
> +
> +	if (csum_ok)
>  		skb->ip_summed = CHECKSUM_UNNECESSARY;
>  	else
>  		skb_checksum_none_assert(skb);
>  }
>  
> +static u32 rtl8169_rx_desc_opts1(struct rtl8169_private *tp, struct RxDesc *desc)
> +{
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		return READ_ONCE(desc->rss_opts1);
> +	default:
> +		return READ_ONCE(desc->opts1);
> +	}
> +}
> +
>  static bool rtl8169_check_rx_desc_error(struct net_device *dev,
>  					struct rtl8169_private *tp,
>  					u32 status)
>  {
> -	if (unlikely(status & RxRES)) {
> -		if (status & (RxRWT | RxRUNT))
> -			dev->stats.rx_length_errors++;
> -		if (status & RxCRC)
> -			dev->stats.rx_crc_errors++;
> -		return true;
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		if (unlikely(status & RX_RES_RSS)) {
> +			if (status & RX_RUNT_RSS)
> +				dev->stats.rx_length_errors++;
> +			if (status & RX_CRC_RSS)
> +				dev->stats.rx_crc_errors++;
> +			return true;
> +		}
> +		break;
> +	default:
> +		if (unlikely(status & RxRES)) {
> +			if (status & (RxRWT | RxRUNT))
> +				dev->stats.rx_length_errors++;
> +			if (status & RxCRC)
> +				dev->stats.rx_crc_errors++;
> +			return true;
> +		}
> +		break;
>  	}
>  	return false;
>  }
>  
> -static void rtl8169_set_desc_dma_addr(struct RxDesc *desc,
> +static void rtl8169_set_desc_dma_addr(struct rtl8169_private *tp,
> +				      struct RxDesc *desc,
>  				      dma_addr_t mapping)
>  {
> -	desc->addr = cpu_to_le64(mapping);
> +	switch (tp->init_rx_desc_type) {
> +	case RX_DESC_RING_TYPE_RSS:
> +		desc->rss_addr = cpu_to_le64(mapping);
> +		break;
> +	default:
> +		desc->addr = cpu_to_le64(mapping);
> +		break;
> +	}
>  }
>  
>  static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
> @@ -4982,7 +5261,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>  		dma_addr_t addr;
>  		u32 status;
>  
> -		status = le32_to_cpu(READ_ONCE(desc->opts1));
> +		status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
>  
>  		if (status & DescOwn) {
>  			if (!tp->recheck_desc_ownbit)
> @@ -4996,7 +5275,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>  			 */
>  			tp->recheck_desc_ownbit = false;
>  			rtl8169_desc_quirk(tp);
> -			status = le32_to_cpu(READ_ONCE(desc->opts1));
> +			status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
>  			if (status & DescOwn)
>  				break;
>  		}
> @@ -5045,11 +5324,12 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>  		skb->tail += pkt_size;
>  		skb->len = pkt_size;
>  		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
> -
> -		rtl8169_rx_csum(skb, desc);
> +		if (tp->rss_enable)
> +			rtl8169_rx_hash(tp, desc, skb);
> +		rtl8169_rx_csum(tp, skb, desc);
>  		skb->protocol = eth_type_trans(skb, dev);
>  
> -		rtl8169_rx_vlan_tag(desc, skb);
> +		rtl8169_rx_vlan_tag(tp, desc, skb);
>  
>  		if (skb->pkt_type == PACKET_MULTICAST)
>  			dev->stats.multicast++;
> @@ -5058,8 +5338,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
>  release_descriptor:
> -		rtl8169_set_desc_dma_addr(desc, ring->rx_desc_phy_addr[entry]);
> -		rtl8169_mark_to_asic(desc);
> +		rtl8169_set_desc_dma_addr(tp, desc, ring->rx_desc_phy_addr[entry]);
> +		rtl8169_mark_to_asic(tp, desc);
>  	}
>  
>  	return count;
> @@ -5607,6 +5887,43 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static int get_max_irq_nvecs(struct rtl8169_private *tp)
> +{
> +	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
> +		return R8127_MAX_IRQ;
> +	return R8169_IRQ_DEFAULT;
> +}
> +
> +static int get_min_irq_nvecs(struct rtl8169_private *tp)
> +{
> +	if (tp->mac_version == RTL_GIGA_MAC_VER_80)
> +		return R8127_MIN_IRQ;
> +	return R8169_IRQ_DEFAULT;
> +}
> +
> +static void rtl8169_double_check_rss_support(struct rtl8169_private *tp)

Needing such a function indicates that checks in other places are not sufficient.

> +{
> +	if (tp->hw_curr_isr_ver > RTL_ISR_VER_DEFAULT) {
> +		if (!(tp->features & RTL_VEC_MAP_ENABLE) || tp->irq_nvecs < get_min_irq_nvecs(tp))

Can this happen? If yes, then why not adjusting hw_curr_isr_ver in a first place?

> +			tp->hw_curr_isr_ver = RTL_ISR_VER_8127;

This looks wrong.

> +	}
> +
> +	if (tp->rss_support && tp->hw_curr_isr_ver > RTL_ISR_VER_DEFAULT) {
> +		u8 rss_queue_num = netif_get_num_default_rss_queues();
> +
> +		tp->num_rx_rings = min(rss_queue_num, tp->hw_supp_num_rx_queues);
> +		if (!(tp->num_rx_rings >= 2 && tp->irq_nvecs >= get_min_irq_nvecs(tp)))
> +			tp->num_rx_rings = 1;
> +	}
> +
> +	tp->rss_enable = 0;
> +
> +	if (tp->num_rx_rings >= 2) {
> +		tp->rss_enable = 1;

Function name just mentions a check, but you set/change values here.


> +		tp->init_rx_desc_type = RX_DESC_RING_TYPE_RSS;
> +	}
> +}
> +
>  static int rtl_alloc_irq(struct rtl8169_private *tp)
>  {
>  	struct pci_dev *pdev = tp->pci_dev;
> @@ -5627,7 +5944,10 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>  		break;
>  	}
>  
> -	nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
> +	nvecs = pci_alloc_irq_vectors(pdev, get_min_irq_nvecs(tp), get_max_irq_nvecs(tp), flags);
> +
> +	if (nvecs < 0)
> +		nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
>  
>  	if (nvecs < 0)
>  		return nvecs;
> @@ -6069,6 +6389,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	tp->dash_type = rtl_get_dash_type(tp);
>  	tp->dash_enabled = rtl_dash_is_enabled(tp);
> +	tp->rss_support = rtl_check_rss_support(tp);
> +
> +	if (tp->rss_support) {
> +		tp->rss_data = devm_kzalloc(&pdev->dev, sizeof(*tp->rss_data), GFP_KERNEL);
> +		if (!tp->rss_data)
> +			return -ENOMEM;
> +	}
>  
>  	tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
>  
> @@ -6095,6 +6422,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	if (!tp->rtl8169_napi)
>  		return -ENOMEM;
>  
> +	rtl8169_double_check_rss_support(tp);
> +
> +	if (tp->rss_support)
> +		rtl8169_init_rss(tp);
> +
>  	INIT_WORK(&tp->wk.work, rtl_task);
>  	disable_work(&tp->wk.work);
>  
> @@ -6112,6 +6444,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
>  	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
>  
> +	if (tp->rss_support) {
> +		dev->hw_features |= NETIF_F_RXHASH;
> +		dev->features |= NETIF_F_RXHASH;

Seems to me like you have different flags, all with the same meaning:
RSS active, MSI-X vec mapping used, hw_curr_isr_ver > DEFAULT, ..
Can't this be consolidated?

> +	}
> +
>  	/*
>  	 * Pretend we are using VLANs; This bypasses a nasty bug where
>  	 * Interrupts stop flowing on high load on 8110SCd controllers.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch net-next v3 7/7] r8169: add support for ethtool
  2026-05-13 11:55 ` [Patch net-next v3 7/7] r8169: add support for ethtool javen
@ 2026-05-16 22:07   ` Heiner Kallweit
  0 siblings, 0 replies; 17+ messages in thread
From: Heiner Kallweit @ 2026-05-16 22:07 UTC (permalink / raw)
  To: javen, nic_swsd, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms
  Cc: netdev, linux-kernel

On 13.05.2026 13:55, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> This patch add support for changing rx queues by ethtool. We can set rx
> 1, 2, 4, 8 by ethtool -L eth1 rx num.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
> Changes in v2:
>  - no changes
> 
> Changes in v3:
>  - no changes
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 126 ++++++++++++++++++++++
>  1 file changed, 126 insertions(+)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index f654e98e47be..ae64955e47d4 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -6269,6 +6269,130 @@ static void r8169_init_napi(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static void rtl8169_get_channels(struct net_device *dev,
> +				 struct ethtool_channels *ch)
> +{
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +
> +	ch->max_rx = tp->hw_supp_num_rx_queues;
> +	ch->max_tx = 1;
> +	ch->max_other = 0;
> +	ch->max_combined = 0;

The struct comes zero-initialized, so explicitly setting zero
values shouldn't be needed.

> +
> +	ch->rx_count = tp->num_rx_rings;
> +	ch->tx_count = 1;
> +	ch->other_count = 0;
> +	ch->combined_count = 0;
> +}
> +
> +static int rtl8169_realloc_rx(struct rtl8169_private *tp,
> +			      struct rtl8169_rx_ring *new_rx,
> +			      int new_count)
> +{
> +	int i, ret;
> +
> +	for (i = 0; i < new_count; i++) {
> +		struct rtl8169_rx_ring *ring = &new_rx[i];
> +
> +		ring->rx_desc_array = dma_alloc_coherent(&tp->pci_dev->dev,
> +							 R8169_RX_RING_BYTES,
> +							 &ring->rx_phy_addr,
> +							 GFP_KERNEL);
> +		if (!ring->rx_desc_array) {
> +			ret = -ENOMEM;
> +			goto err_free;
> +		}
> +
> +		memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
> +		ret = rtl8169_rx_fill(tp, ring);
> +		if (ret) {
> +			dma_free_coherent(&tp->pci_dev->dev, R8169_RX_RING_BYTES,
> +					  ring->rx_desc_array, ring->rx_phy_addr);
> +			goto err_free;
> +		}
> +	}
> +	return 0;
> +
> +err_free:
> +	while (--i >= 0) {
> +		rtl8169_rx_clear(tp, &new_rx[i]);
> +		dma_free_coherent(&tp->pci_dev->dev, R8169_RX_RING_BYTES,
> +				  new_rx[i].rx_desc_array, new_rx[i].rx_phy_addr);
> +	}
> +	return ret;
> +}
> +
> +static int rtl8169_set_channels(struct net_device *dev,
> +				struct ethtool_channels *ch)
> +{
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	bool if_running = netif_running(dev);
> +	struct rtl8169_rx_ring *new_rx;
> +	u8 old_tx_desc_type = tp->init_rx_desc_type;
> +	u8 new_desc_type;
> +	bool new_rss_enable;
> +	int i, ret;
> +
> +	if (!tp->rss_support && (ch->rx_count > 1 || ch->tx_count > 1)) {
> +		netdev_warn(dev, "This chip does not support multiple channels/RSS.\n");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (!(tp->features & RTL_VEC_MAP_ENABLE))
> +		return -EINVAL;
> +
> +	new_rss_enable = (ch->rx_count > 1 && tp->rss_support);
> +	new_desc_type = new_rss_enable ? RX_DESC_RING_TYPE_RSS : RX_DESC_RING_TYPE_DEFAULT;
> +	tp->init_rx_desc_type = new_desc_type;
> +
> +	if (!if_running) {
> +		tp->num_rx_rings = ch->rx_count;
> +		tp->rss_enable = new_rss_enable;
> +		return 0;
> +	}
> +
> +	new_rx = kcalloc(R8169_MAX_RX_QUEUES, sizeof(*new_rx), GFP_KERNEL);

Better use the new _obj allocators, like kzalloc_objs here.

> +	if (!new_rx)
> +		return -ENOMEM;
> +
> +	ret = rtl8169_realloc_rx(tp, new_rx, ch->rx_count);
> +	if (ret) {
> +		kfree(new_rx);
> +		tp->init_rx_desc_type = old_tx_desc_type;
> +		return ret;
> +	}
> +
> +	netif_stop_queue(dev);
> +	rtl8169_down(tp);
> +
> +	for (i = 0; i < tp->num_rx_rings; i++)
> +		rtl8169_rx_clear(tp, &tp->rx_ring[i]);
> +	rtl8169_free_rx_desc(tp);
> +
> +	tp->num_rx_rings = ch->rx_count;
> +	tp->rss_enable = new_rss_enable;
> +
> +	memset(tp->rx_ring, 0, sizeof(tp->rx_ring));
> +	memcpy(tp->rx_ring, new_rx, sizeof(*new_rx) * ch->rx_count);
> +
> +	for (i = 0; i < tp->rss_data->hw_supp_indir_tbl_entries; i++) {
> +		if (tp->rss_enable)
> +			tp->rss_data->rss_indir_tbl[i] =
> +				ethtool_rxfh_indir_default(i, tp->num_rx_rings);
> +		else
> +			tp->rss_data->rss_indir_tbl[i] = 0;
> +	}
> +
> +	rtl_set_irq_mask(tp);
> +
> +	rtl8169_up(tp);
> +	netif_start_queue(dev);
> +
> +	kfree(new_rx);
> +
> +	return 0;
> +}
> +
>  static const struct ethtool_ops rtl8169_ethtool_ops = {
>  	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
>  				     ETHTOOL_COALESCE_MAX_FRAMES,
> @@ -6287,6 +6411,8 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
>  	.nway_reset		= phy_ethtool_nway_reset,
>  	.get_eee		= rtl8169_get_eee,
>  	.set_eee		= rtl8169_set_eee,
> +	.get_channels		= rtl8169_get_channels,
> +	.set_channels		= rtl8169_set_channels,
>  	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
>  	.set_link_ksettings	= rtl8169_set_link_ksettings,
>  	.get_ringparam		= rtl8169_get_ringparam,


^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [Patch net-next v3 2/7] r8169: add support for multi rx queues
  2026-05-16 22:07   ` Heiner Kallweit
@ 2026-05-18  7:47     ` Javen
  0 siblings, 0 replies; 17+ messages in thread
From: Javen @ 2026-05-18  7:47 UTC (permalink / raw)
  To: Heiner Kallweit, nic_swsd@realtek.com, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, horms@kernel.org
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org

>On 13.05.2026 13:55, javen wrote:
>> From: Javen Xu <javen_xu@realsil.com.cn>
>>
>> This patch adds support for multi rx queues. RSS requires multi rx
>> queues to receive packets. So we need struct rtl8169_rx_ring for each
>> queue.
>>
>> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
>> ---
>> Changes in v2:
>>  - sort some registers by its number
>>  - remove some unused definitions, like RX_DESC_RING_TYPE_MAX
>>  - change recheck_desc_ownbit type
>>  - remove rdsar_reg in rx_ring struct
>>  - opts1 are different in rx_desc and rx_desc_rss, move the judgement
>>    to Patch 5/7
>>
>> Changes in v3:
>>  - remove ring->rx_desc_alloc_size, use constant instead
>> ---
>>  drivers/net/ethernet/realtek/r8169_main.c | 262
>> ++++++++++++++++------
>>  1 file changed, 199 insertions(+), 63 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c
>> b/drivers/net/ethernet/realtek/r8169_main.c
>> index e4fc84c97c1e..9dab0fbcca61 100644
>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>> @@ -74,10 +74,13 @@
>>  #define NUM_TX_DESC  256     /* Number of Tx descriptor registers */
>>  #define NUM_RX_DESC  256     /* Number of Rx descriptor registers */
>>  #define R8169_TX_RING_BYTES  (NUM_TX_DESC * sizeof(struct TxDesc))
>> -#define R8169_RX_RING_BYTES  (NUM_RX_DESC * sizeof(struct RxDesc))
>> +#define R8169_RX_RING_BYTES  ((NUM_RX_DESC + 1) * sizeof(struct
>> +RxDesc))
>>  #define R8169_TX_STOP_THRS   (MAX_SKB_FRAGS + 1)
>>  #define R8169_TX_START_THRS  (2 * R8169_TX_STOP_THRS)
>> +#define R8169_MAX_RX_QUEUES  8
>>  #define R8169_MAX_MSIX_VEC   32
>> +#define R8127_MAX_RX_QUEUES  8
>> +#define R8169_DEFAULT_RX_QUEUES      1
>>
>>  #define OCP_STD_PHY_BASE     0xa400
>>
>> @@ -444,6 +447,7 @@ enum rtl8125_registers {
>>       TxPoll_8125             = 0x90,
>>       LEDSEL3                 = 0x96,
>>       MAC0_BKP                = 0x19e0,
>> +     RDSAR_Q1_LOW            = 0x4000,
>>       RSS_CTRL_8125           = 0x4500,
>>       Q_NUM_CTRL_8125         = 0x4800,
>>       EEE_TXIDLE_TIMER_8125   = 0x6048,
>> @@ -736,6 +740,21 @@ enum rtl_dash_type {
>>       RTL_DASH_25_BP,
>>  };
>>
>> +enum rx_desc_ring_type {
>> +     RX_DESC_RING_TYPE_DEFAULT,
>> +     RX_DESC_RING_TYPE_RSS,
>> +};
>> +
>> +struct rtl8169_rx_ring {
>> +     u32 index;                                      /* Rx queue index */
>> +     u32 cur_rx;                                     /* Index of next Rx pkt. */
>> +     u32 dirty_rx;                                   /* Index for recycling. */
>> +     struct RxDesc *rx_desc_array;                   /* array of Rx Desc*/
>> +     dma_addr_t rx_desc_phy_addr[NUM_RX_DESC];       /* Rx data buffer
>physical dma address */
>> +     dma_addr_t rx_phy_addr;                         /* Rx desc physical address */
>> +     struct page *rx_databuff[NUM_RX_DESC];          /* Rx data buffers */
>> +};
>> +
>>  struct rtl8169_private {
>>       void __iomem *mmio_addr;        /* memory map physical address */
>>       struct pci_dev *pci_dev;
>> @@ -743,16 +762,13 @@ struct rtl8169_private {
>>       struct phy_device *phydev;
>>       enum mac_version mac_version;
>>       enum rtl_dash_type dash_type;
>> -     u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
>>       u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
>>       u32 dirty_tx;
>>       struct TxDesc *TxDescArray;     /* 256-aligned Tx descriptor ring */
>> -     struct RxDesc *RxDescArray;     /* 256-aligned Rx descriptor ring */
>>       dma_addr_t TxPhyAddr;
>> -     dma_addr_t RxPhyAddr;
>> -     struct page *Rx_databuff[NUM_RX_DESC];  /* Rx data buffers */
>>       struct ring_info tx_skb[NUM_TX_DESC];   /* Tx data buffers */
>>       struct napi_struct *rtl8169_napi;
>> +     struct rtl8169_rx_ring rx_ring[R8169_MAX_RX_QUEUES];
>>       unsigned int num_rx_rings;
>>       u16 cp_cmd;
>>       u16 tx_lpi_timer;
>> @@ -2635,9 +2651,27 @@ static void rtl_init_rxcfg(struct rtl8169_private
>*tp)
>>       }
>>  }
>>
>> +static void rtl8169_rx_desc_init(struct rtl8169_private *tp) {
>> +     for (int i = 0; i < tp->num_rx_rings; i++) {
>> +             struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>> +
>> +             memset(ring->rx_desc_array, 0x0, R8169_RX_RING_BYTES);
>> +     }
>> +}
>> +
>>  static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)  {
>> -     tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
>> +     tp->dirty_tx = 0;
>> +     tp->cur_tx = 0;
>> +
>> +     for (int i = 0; i < tp->hw_supp_num_rx_queues; i++) {
>> +             struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>> +
>> +             ring->dirty_rx = 0;
>> +             ring->cur_rx = 0;
>> +             ring->index = i;
>> +     }
>>  }
>>
>>  static void rtl_jumbo_config(struct rtl8169_private *tp) @@ -2702,9
>> +2736,11 @@ static void rtl_software_parameter_initialize(struct
>> rtl8169_private *tp)
>>
>>       switch (tp->mac_version) {
>>       case RTL_GIGA_MAC_VER_80:
>> +             tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
>>               tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
>>               break;
>>       default:
>> +             tp->hw_supp_num_rx_queues = R8169_DEFAULT_RX_QUEUES;
>>               tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
>>               break;
>>       }
>> @@ -2835,6 +2871,8 @@ static void rtl_set_rx_max_size(struct
>> rtl8169_private *tp)
>>
>>  static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
>> {
>> +     struct rtl8169_rx_ring *ring = &tp->rx_ring[0];
>> +
>>       /*
>>        * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
>>        * register to be written before TxDescAddrLow to work.
>> @@ -2842,8 +2880,16 @@ static void rtl_set_rx_tx_desc_registers(struct
>rtl8169_private *tp)
>>        */
>>       RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
>>       RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) &
>DMA_BIT_MASK(32));
>> -     RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
>> -     RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) &
>DMA_BIT_MASK(32));
>> +     RTL_W32(tp, RxDescAddrHigh, ((u64) ring->rx_phy_addr) >> 32);
>> +     RTL_W32(tp, RxDescAddrLow, ((u64) ring->rx_phy_addr) &
>> + DMA_BIT_MASK(32));
>> +
>> +     for (int i = 1; i < tp->num_rx_rings; i++) {
>> +             struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>> +             u16 rdsar_reg = (u16)(RDSAR_Q1_LOW + (i - 1) * 8);
>
>Not clear why u16 instead of unsigned int?
>
>> +
>> +             RTL_W32(tp, rdsar_reg + 4, ((u64)ring->rx_phy_addr >> 32));
>> +             RTL_W32(tp, rdsar_reg, ((u64)ring->rx_phy_addr) &
>DMA_BIT_MASK(32));
>> +     }
>>  }
>>
>>  static void rtl8169_set_magic_reg(struct rtl8169_private *tp) @@
>> -4190,12 +4236,13 @@ static void rtl8169_mark_to_asic(struct RxDesc
>> *desc)  }
>>
>>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>> -                                       struct RxDesc *desc)
>> +                                       struct rtl8169_rx_ring *ring,
>> + unsigned int index)
>>  {
>>       struct device *d = tp_to_dev(tp);
>>       int node = dev_to_node(d);
>>       dma_addr_t mapping;
>>       struct page *data;
>> +     struct RxDesc *desc = ring->rx_desc_array + index;
>
>Reverse xmas tree
>
>>
>>       data = alloc_pages_node(node, GFP_KERNEL,
>get_order(R8169_RX_BUF_SIZE));
>>       if (!data)
>> @@ -4209,55 +4256,100 @@ static struct page
>*rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>>       }
>>
>>       desc->addr = cpu_to_le64(mapping);
>> +     ring->rx_desc_phy_addr[index] = mapping;
>>       rtl8169_mark_to_asic(desc);
>>
>>       return data;
>>  }
>>
>> -static void rtl8169_rx_clear(struct rtl8169_private *tp)
>> +static void rtl8169_rx_clear(struct rtl8169_private *tp, struct
>> +rtl8169_rx_ring *ring)
>>  {
>>       int i;
>>
>> -     for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
>> +     for (i = 0; i < NUM_RX_DESC && ring->rx_databuff[i]; i++) {
>>               dma_unmap_page(tp_to_dev(tp),
>> -                            le64_to_cpu(tp->RxDescArray[i].addr),
>> +                            ring->rx_desc_phy_addr[i],
>>                              R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
>> -             __free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
>> -             tp->Rx_databuff[i] = NULL;
>> -             tp->RxDescArray[i].addr = 0;
>> -             tp->RxDescArray[i].opts1 = 0;
>> +             __free_pages(ring->rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
>> +             ring->rx_databuff[i] = NULL;
>> +             ring->rx_desc_phy_addr[i] = 0;
>> +             ring->rx_desc_array[i].addr = 0;
>> +             ring->rx_desc_array[i].opts1 = 0;
>>       }
>>  }
>>
>> -static int rtl8169_rx_fill(struct rtl8169_private *tp)
>> +static int rtl8169_rx_fill(struct rtl8169_private *tp, struct
>> +rtl8169_rx_ring *ring)
>>  {
>>       int i;
>>
>>       for (i = 0; i < NUM_RX_DESC; i++) {
>>               struct page *data;
>>
>> -             data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
>> +             data = rtl8169_alloc_rx_data(tp, ring, i);
>>               if (!data) {
>> -                     rtl8169_rx_clear(tp);
>> +                     rtl8169_rx_clear(tp, ring);
>>                       return -ENOMEM;
>>               }
>> -             tp->Rx_databuff[i] = data;
>> +             ring->rx_databuff[i] = data;
>>       }
>>
>>       /* mark as last descriptor in the ring */
>> -     tp->RxDescArray[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
>> +     ring->rx_desc_array[NUM_RX_DESC - 1].opts1 |=
>> + cpu_to_le32(RingEnd);
>> +
>> +     return 0;
>> +}
>> +
>> +static int rtl8169_alloc_rx_desc(struct rtl8169_private *tp) {
>> +     struct rtl8169_rx_ring *ring;
>> +     struct pci_dev *pdev = tp->pci_dev;
>
>Reverse xmas tree. I won't check this any further, please check yourself and fix
>in all affected places.
>
>>
>> +     for (int i = 0; i < tp->num_rx_rings; i++) {
>> +             ring = &tp->rx_ring[i];
>> +             ring->rx_desc_array = dma_alloc_coherent(&pdev->dev,
>> +                                                      R8169_RX_RING_BYTES,
>> +                                                      &ring->rx_phy_addr,
>> +                                                      GFP_KERNEL);
>> +             if (!ring->rx_desc_array)
>> +                     return -1;
>> +     }
>>       return 0;
>>  }
>>
>> +static void rtl8169_free_rx_desc(struct rtl8169_private *tp) {
>> +     struct rtl8169_rx_ring *ring;
>> +     struct pci_dev *pdev = tp->pci_dev;
>> +
>> +     for (int i = 0; i < tp->num_rx_rings; i++) {
>> +             ring = &tp->rx_ring[i];
>> +             if (ring->rx_desc_array) {
>> +                     dma_free_coherent(&pdev->dev,
>> +                                       R8169_RX_RING_BYTES,
>> +                                       ring->rx_desc_array,
>> +                                       ring->rx_phy_addr);
>> +                     ring->rx_desc_array = NULL;
>> +             }
>> +     }
>> +}
>> +
>>  static int rtl8169_init_ring(struct rtl8169_private *tp)  {
>> +     int retval = 0;
>> +
>>       rtl8169_init_ring_indexes(tp);
>> +     rtl8169_rx_desc_init(tp);
>>
>>       memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
>> -     memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
>>
>> -     return rtl8169_rx_fill(tp);
>> +     for (int i = 0; i < tp->num_rx_rings; i++) {
>> +             struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>> +
>> +             memset(ring->rx_databuff, 0, sizeof(ring->rx_databuff));
>> +             retval = rtl8169_rx_fill(tp, ring);
>> +     }
>> +
>> +     return retval;
>>  }
>>
>>  static void rtl8169_unmap_tx_skb(struct rtl8169_private *tp, unsigned
>> int entry) @@ -4346,16 +4438,23 @@ static void rtl8169_cleanup(struct
>rtl8169_private *tp)
>>       rtl8169_init_ring_indexes(tp);
>>  }
>>
>> -static void rtl_reset_work(struct rtl8169_private *tp)
>> +static void rtl8169_rx_desc_reset(struct rtl8169_private *tp)
>>  {
>> -     int i;
>> +     for (int i = 0; i < tp->num_rx_rings; i++) {
>> +             struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>>
>> +             for (int j = 0; j < NUM_RX_DESC; j++)
>> +                     rtl8169_mark_to_asic(ring->rx_desc_array + j);
>> +     }
>> +}
>> +
>> +static void rtl_reset_work(struct rtl8169_private *tp) {
>>       netif_stop_queue(tp->dev);
>>
>>       rtl8169_cleanup(tp);
>>
>> -     for (i = 0; i < NUM_RX_DESC; i++)
>> -             rtl8169_mark_to_asic(tp->RxDescArray + i);
>> +     rtl8169_rx_desc_reset(tp);
>>
>>       rtl8169_napi_enable(tp);
>>       rtl_hw_start(tp);
>> @@ -4749,6 +4848,11 @@ static void rtl8169_pcierr_interrupt(struct
>net_device *dev)
>>       rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);  }
>>
>> +static void rtl8169_desc_quirk(struct rtl8169_private *tp) {
>> +     RTL_R8(tp, LED_CTRL);
>> +}
>> +
>>  static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
>>                  int budget)
>>  {
>> @@ -4801,9 +4905,10 @@ static inline int rtl8169_fragmented_frame(u32
>status)
>>       return (status & (FirstFrag | LastFrag)) != (FirstFrag |
>> LastFrag);  }
>>
>> -static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
>> +static inline void rtl8169_rx_csum(struct sk_buff *skb,
>> +                                struct RxDesc *desc)
>>  {
>> -     u32 status = opts1 & (RxProtoMask | RxCSFailMask);
>> +     u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask |
>> + RxCSFailMask);
>>
>>       if (status == RxProtoTCP || status == RxProtoUDP)
>>               skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -4811,22
>> +4916,58 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32
>opts1)
>>               skb_checksum_none_assert(skb);  }
>>
>> -static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>> int budget)
>> +static bool rtl8169_check_rx_desc_error(struct net_device *dev,
>> +                                     struct rtl8169_private *tp,
>> +                                     u32 status) {
>> +     if (unlikely(status & RxRES)) {
>> +             if (status & (RxRWT | RxRUNT))
>> +                     dev->stats.rx_length_errors++;
>> +             if (status & RxCRC)
>> +                     dev->stats.rx_crc_errors++;
>> +             return true;
>> +     }
>> +     return false;
>> +}
>> +
>> +static void rtl8169_set_desc_dma_addr(struct RxDesc *desc,
>> +                                   dma_addr_t mapping) {
>> +     desc->addr = cpu_to_le64(mapping); }
>> +
>> +static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>> +               struct rtl8169_rx_ring *ring, int budget)
>>  {
>>       struct device *d = tp_to_dev(tp);
>>       int count;
>>
>> -     for (count = 0; count < budget; count++, tp->cur_rx++) {
>> -             unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
>> -             struct RxDesc *desc = tp->RxDescArray + entry;
>> +     for (count = 0; count < budget; count++, ring->cur_rx++) {
>> +             unsigned int pkt_size, entry = ring->cur_rx % NUM_RX_DESC;
>> +             struct RxDesc *desc = ring->rx_desc_array + entry;
>>               struct sk_buff *skb;
>>               const void *rx_buf;
>>               dma_addr_t addr;
>>               u32 status;
>>
>>               status = le32_to_cpu(READ_ONCE(desc->opts1));
>> -             if (status & DescOwn)
>> -                     break;
>> +
>> +             if (status & DescOwn) {
>> +                     if (!tp->recheck_desc_ownbit)
>> +                             break;
>> +
>> +                     /* Workaround for a hardware issue:
>> +                      * Hardware might trigger RX interrupt before the DMA
>> +                      * engine fully updates RX desc ownbit in host memory.
>> +                      * So we do a quirk and re-read to avoid missing RX
>> +                      * packets.
>> +                      */
>> +                     tp->recheck_desc_ownbit = false;
>> +                     rtl8169_desc_quirk(tp);
>
>Do we need this helper for one register read?
>Description says "re-read to avoid missing RX", but you read the LED_CTRL
>register. This needs an explanation, best extend the comment accordingly.
>Do you have to read just a random register, or has it a specific reason why you
>read exactly this register?

I will drop the helper function and just remain RTL_R8(tp, LED_CTRL).
Reading LED_CTRL is essentially a dummy read to force a PCIe flusH. Hardware ownbit might be delayed, causing rx miss. Reading a side-effect-free register like LED_CTRL flushes the bus, ensuring the descriptor status in ram is up-to-date.
Reading any register will trigger the flush, so no specific reasons.

Thanks,
BRs,
Javen

>
>> +                     status = le32_to_cpu(READ_ONCE(desc->opts1));
>> +                     if (status & DescOwn)
>> +                             break;
>> +             }
>>
>>               /* This barrier is needed to keep us from reading
>>                * any other fields out of the Rx descriptor until @@
>> -4834,20 +4975,14 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp, int budget
>>                */
>>               dma_rmb();
>>
>> -             if (unlikely(status & RxRES)) {
>> +             if (rtl8169_check_rx_desc_error(dev, tp, status)) {
>>                       if (net_ratelimit())
>>                               netdev_warn(dev, "Rx ERROR. status = %08x\n",
>>                                           status);
>>                       dev->stats.rx_errors++;
>> -                     if (status & (RxRWT | RxRUNT))
>> -                             dev->stats.rx_length_errors++;
>> -                     if (status & RxCRC)
>> -                             dev->stats.rx_crc_errors++;
>>
>>                       if (!(dev->features & NETIF_F_RXALL))
>>                               goto release_descriptor;
>> -                     else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
>> -                             goto release_descriptor;
>>               }
>>
>>               pkt_size = status & GENMASK(13, 0); @@ -4863,14 +4998,14
>> @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int
>budget
>>                       goto release_descriptor;
>>               }
>>
>> -             skb = napi_alloc_skb(&tp->rtl8169_napi[0], pkt_size);
>> +             skb = napi_alloc_skb(&tp->rtl8169_napi[ring->index],
>> + pkt_size);
>>               if (unlikely(!skb)) {
>>                       dev->stats.rx_dropped++;
>>                       goto release_descriptor;
>>               }
>>
>> -             addr = le64_to_cpu(desc->addr);
>> -             rx_buf = page_address(tp->Rx_databuff[entry]);
>> +             addr = ring->rx_desc_phy_addr[entry];
>> +             rx_buf = page_address(ring->rx_databuff[entry]);
>>
>>               dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
>>               prefetch(rx_buf);
>> @@ -4879,7 +5014,7 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp, int budget
>>               skb->len = pkt_size;
>>               dma_sync_single_for_device(d, addr, pkt_size,
>> DMA_FROM_DEVICE);
>>
>> -             rtl8169_rx_csum(skb, status);
>> +             rtl8169_rx_csum(skb, desc);
>>               skb->protocol = eth_type_trans(skb, dev);
>>
>>               rtl8169_rx_vlan_tag(desc, skb); @@ -4887,10 +5022,11 @@
>> static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>>               if (skb->pkt_type == PACKET_MULTICAST)
>>                       dev->stats.multicast++;
>>
>> -             napi_gro_receive(&tp->rtl8169_napi[0], skb);
>> +             napi_gro_receive(&tp->rtl8169_napi[ring->index], skb);
>>
>>               dev_sw_netstats_rx_add(dev, pkt_size);
>>  release_descriptor:
>> +             rtl8169_set_desc_dma_addr(desc,
>> + ring->rx_desc_phy_addr[entry]);
>>               rtl8169_mark_to_asic(desc);
>>       }
>>
>> @@ -4917,6 +5053,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void
>*dev_instance)
>>               phy_mac_interrupt(tp->phydev);
>>
>>       rtl_irq_disable(tp);
>> +     tp->recheck_desc_ownbit = true;
>>       napi_schedule(napi);
>>  out:
>>       rtl_ack_events(tp, status);
>> @@ -4992,7 +5129,8 @@ static int rtl8169_poll(struct napi_struct
>> *napi, int budget)
>>
>>       rtl_tx(dev, tp, budget);
>>
>> -     work_done = rtl_rx(dev, tp, budget);
>> +     for (int i = 0; i < tp->num_rx_rings; i++)
>> +             work_done += rtl_rx(dev, tp, &tp->rx_ring[i], budget);
>>
>>       if (work_done < budget && napi_complete_done(napi, work_done))
>>               rtl_irq_enable(tp);
>> @@ -5120,21 +5258,19 @@ static int rtl8169_close(struct net_device *dev)
>>       struct pci_dev *pdev = tp->pci_dev;
>>
>>       pm_runtime_get_sync(&pdev->dev);
>> -
>>       netif_stop_queue(dev);
>>       rtl8169_down(tp);
>> -     rtl8169_rx_clear(tp);
>> +     for (int i = 0; i < tp->num_rx_rings; i++)
>> +             rtl8169_rx_clear(tp, &tp->rx_ring[i]);
>>
>>       rtl8169_free_irq(tp);
>>
>>       phy_disconnect(tp->phydev);
>>
>> -     dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp-
>>RxDescArray,
>> -                       tp->RxPhyAddr);
>>       dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp-
>>TxDescArray,
>>                         tp->TxPhyAddr);
>>       tp->TxDescArray = NULL;
>> -     tp->RxDescArray = NULL;
>> +     rtl8169_free_rx_desc(tp);
>>
>>       pm_runtime_put_sync(&pdev->dev);
>>
>> @@ -5165,13 +5301,11 @@ static int rtl_open(struct net_device *dev)
>>       tp->TxDescArray = dma_alloc_coherent(&pdev->dev,
>R8169_TX_RING_BYTES,
>>                                            &tp->TxPhyAddr, GFP_KERNEL);
>>       if (!tp->TxDescArray)
>> -             goto out;
>> -
>> -     tp->RxDescArray = dma_alloc_coherent(&pdev->dev,
>R8169_RX_RING_BYTES,
>> -                                          &tp->RxPhyAddr, GFP_KERNEL);
>> -     if (!tp->RxDescArray)
>>               goto err_free_tx_0;
>>
>> +     if (rtl8169_alloc_rx_desc(tp) < 0)
>> +             goto err_free_rx_1;
>> +
>>       retval = rtl8169_init_ring(tp);
>>       if (retval < 0)
>>               goto err_free_rx_1;
>> @@ -5198,11 +5332,10 @@ static int rtl_open(struct net_device *dev)
>>       rtl8169_free_irq(tp);
>>  err_release_fw_2:
>>       rtl_release_firmware(tp);
>> -     rtl8169_rx_clear(tp);
>> +     for (int i = 0; i < tp->num_rx_rings; i++)
>> +             rtl8169_rx_clear(tp, &tp->rx_ring[i]);
>>  err_free_rx_1:
>> -     dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp-
>>RxDescArray,
>> -                       tp->RxPhyAddr);
>> -     tp->RxDescArray = NULL;
>> +     rtl8169_free_rx_desc(tp);
>>  err_free_tx_0:
>>       dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp-
>>TxDescArray,
>>                         tp->TxPhyAddr); @@ -5705,7 +5838,10 @@ static
>> int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>>       u32 txconfig;
>>       u32 xid;
>>
>> -     dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
>> +     dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(*tp),
>> +                                   1,
>> +                                   R8169_MAX_RX_QUEUES);
>> +
>>       if (!dev)
>>               return -ENOMEM;
>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [Patch net-next v3 3/7] r8169: add support for new interrupt mapping
  2026-05-16 22:07   ` Heiner Kallweit
@ 2026-05-18  8:21     ` Javen
  0 siblings, 0 replies; 17+ messages in thread
From: Javen @ 2026-05-18  8:21 UTC (permalink / raw)
  To: Heiner Kallweit, nic_swsd@realtek.com, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, horms@kernel.org
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org

>On 13.05.2026 13:55, javen wrote:
>> From: Javen Xu <javen_xu@realsil.com.cn>
>>
>> To support RSS, the number of hardware interrupt bits should match the
>> interrupt of software. So we add support for new interrupt mapping here.
>> ISR_VER_MAP_REG is the hardware register to indicate interrupt status.
>> IMR_SET_VEC_MAP_REG is interrupt mask which is set to enable irq.
>>
>> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
>> ---
>> Changes in v2:
>>  - no changes
>>
>> Changes in v3:
>>  - init index in napi_struct and get message_id from index
>>  - move rtl8169_disable_hw_interrupt_msix directly before the call to
>>    napi_schedule()
>>  - change the condition in rtl8169_request_irq when RTL_VEC_MAP_ENABLE
>>    enabled, use rtl8169_interrupt_msix
>> ---
>>  drivers/net/ethernet/realtek/r8169_main.c | 165
>> ++++++++++++++++++++--
>>  1 file changed, 151 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c
>> b/drivers/net/ethernet/realtek/r8169_main.c
>> index 9dab0fbcca61..f259cc0cee37 100644
>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>> @@ -78,6 +78,7 @@
>>  #define R8169_TX_STOP_THRS   (MAX_SKB_FRAGS + 1)
>>  #define R8169_TX_START_THRS  (2 * R8169_TX_STOP_THRS)  #define
>> R8169_MAX_RX_QUEUES  8
>> +#define R8127_MAX_TX_QUEUES  8
>>  #define R8169_MAX_MSIX_VEC   32
>>  #define R8127_MAX_RX_QUEUES  8
>>  #define R8169_DEFAULT_RX_QUEUES      1
>> @@ -451,8 +452,13 @@ enum rtl8125_registers {
>>       RSS_CTRL_8125           = 0x4500,
>>       Q_NUM_CTRL_8125         = 0x4800,
>>       EEE_TXIDLE_TIMER_8125   = 0x6048,
>> +     IMR_CLEAR_VEC_MAP_REG   = 0x0d00,
>> +     ISR_VEC_MAP_REG         = 0x0d04,
>> +     IMR_SET_VEC_MAP_REG     = 0x0d0c,
>>  };
>>
>> +#define MSIX_ID_VEC_MAP_LINKCHG      29
>> +#define RTL_VEC_MAP_ENABLE   BIT(0)
>>  #define LEDSEL_MASK_8125     0x23f
>>
>>  #define RX_VLAN_INNER_8125   BIT(22)
>> @@ -583,6 +589,9 @@ enum rtl_register_content {
>>
>>       /* magic enable v2 */
>>       MagicPacket_v2  = (1 << 16),    /* Wake up when receives a Magic Packet
>*/
>> +#define      ISRIMR_LINKCHG  BIT(29)
>> +#define      ISRIMR_TOK_Q0   BIT(8)
>> +#define      ISRIMR_ROK_Q0   BIT(0)
>>  };
>>
>>  enum rtl_isr_version {
>> @@ -778,6 +787,7 @@ struct rtl8169_private {
>>       enum rtl_isr_version hw_curr_isr_ver;
>>       u8 irq_nvecs;
>>       bool recheck_desc_ownbit;
>> +     unsigned int features;
>
>Why do you add an extra bitmap here? Why not use bool xxx:1 like for other
>flags?
>
>>       int irq;
>>       struct clk *clk;
>>
>> @@ -1676,26 +1686,36 @@ static u32 rtl_get_events(struct
>> rtl8169_private *tp)
>>
>>  static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)  {
>> -     if (rtl_is_8125(tp))
>> +     if (rtl_is_8125(tp)) {
>>               RTL_W32(tp, IntrStatus_8125, bits);
>> -     else
>> +             if (tp->features & RTL_VEC_MAP_ENABLE)
>
>Looks to me like this check is equivalent to checking hw_curr_isr_ver >
>RTL_ISR_VER_DEFAULT, or?
>If yes, then this additional flag doesn't seem to be needed.

I will remove this flag, and replace it with check if (tp->irq_nvecs > 1), I think this is more clearly. When we allocate irq more than 1, just enable vector interrupt mapping.

>
>> +                     RTL_W32(tp, ISR_VEC_MAP_REG, 0xffffffff);
>> +     } else {
>>               RTL_W16(tp, IntrStatus, bits);
>> +     }
>>  }
>>
>>  static void rtl_irq_disable(struct rtl8169_private *tp)  {
>> -     if (rtl_is_8125(tp))
>> +     if (rtl_is_8125(tp)) {
>>               RTL_W32(tp, IntrMask_8125, 0);
>> -     else
>> +             if (tp->features & RTL_VEC_MAP_ENABLE)
>> +                     RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, 0xffffffff);
>> +     } else {
>>               RTL_W16(tp, IntrMask, 0);
>> +     }
>>  }
>>
>>  static void rtl_irq_enable(struct rtl8169_private *tp)  {
>> -     if (rtl_is_8125(tp))
>> -             RTL_W32(tp, IntrMask_8125, tp->irq_mask);
>> -     else
>> +     if (rtl_is_8125(tp)) {
>> +             if (tp->features & RTL_VEC_MAP_ENABLE)
>> +                     RTL_W32(tp, IMR_SET_VEC_MAP_REG, tp->irq_mask);
>> +             else
>> +                     RTL_W32(tp, IntrMask_8125, tp->irq_mask);
>> +     } else {
>>               RTL_W16(tp, IntrMask, tp->irq_mask);
>> +     }
>>  }
>>
>>  static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp) @@
>> -5070,6 +5090,42 @@ static void rtl8169_free_irq(struct rtl8169_private *tp)
>>       }
>>  }
>>
>> +static void rtl8169_disable_hw_interrupt_msix(struct rtl8169_private
>> +*tp, int message_id) {
>> +     RTL_W32(tp, IMR_CLEAR_VEC_MAP_REG, BIT(message_id)); }
>> +
>> +static void rtl8169_clear_hw_isr(struct rtl8169_private *tp, int
>> +message_id) {
>> +     RTL_W32(tp, ISR_VEC_MAP_REG, BIT(message_id)); }
>> +
>> +static void rtl8169_enable_hw_interrupt_msix(struct rtl8169_private
>> +*tp, int message_id) {
>> +     RTL_W32(tp, IMR_SET_VEC_MAP_REG, BIT(message_id)); }
>> +
>> +static irqreturn_t rtl8169_interrupt_msix(int irq, void
>> +*dev_instance) {
>> +     struct napi_struct *napi = dev_instance;
>> +     struct net_device *dev = napi->dev;
>> +     struct rtl8169_private *tp = netdev_priv(dev);
>> +     int message_id = napi->index;
>> +
>> +     rtl8169_clear_hw_isr(tp, message_id);
>> +
>> +     if (message_id == MSIX_ID_VEC_MAP_LINKCHG) {
>> +             phy_mac_interrupt(tp->phydev);
>> +             return IRQ_HANDLED;
>> +     }
>> +
>> +     tp->recheck_desc_ownbit = true;
>> +     rtl8169_disable_hw_interrupt_msix(tp, message_id);
>> +     napi_schedule(napi);
>> +
>> +     return IRQ_HANDLED;
>> +}
>> +
>>  static int rtl8169_request_irq(struct rtl8169_private *tp)  {
>>       struct net_device *dev = tp->dev; @@ -5078,8 +5134,12 @@ static
>> int rtl8169_request_irq(struct rtl8169_private *tp)
>>
>>       for (int i = 0; i < tp->irq_nvecs; i++) {
>>               napi = &tp->rtl8169_napi[i];
>> -             rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
>> -                                  NULL, napi, "%s-%d", dev->name, i);
>> +             if (tp->features & RTL_VEC_MAP_ENABLE)
>> +                     rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt_msix,
>> +                                          NULL, napi, "%s-%d", dev->name, i);
>> +             else
>> +                     rc = pci_request_irq(tp->pci_dev, i, rtl8169_interrupt,
>> +                                          NULL, napi, "%s-%d",
>> + dev->name, i);
>>               if (rc)
>>                       break;
>>       }
>> @@ -5523,10 +5583,16 @@ static const struct net_device_ops
>> rtl_netdev_ops = {
>>
>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)  {
>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
>> +     if (tp->features & RTL_VEC_MAP_ENABLE) {
>> +             tp->irq_mask = ISRIMR_LINKCHG | ISRIMR_TOK_Q0;
>> +             for (int i = 0; i < tp->num_rx_rings; i++)
>> +                     tp->irq_mask |= ISRIMR_ROK_Q0 << i;
>> +     } else {
>> +             tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
>>
>> -     if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
>> -             tp->irq_mask |= SYSErr | RxFIFOOver;
>> +             if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
>> +                     tp->irq_mask |= SYSErr | RxFIFOOver;
>> +     }
>>  }
>>
>>  static int rtl_alloc_irq(struct rtl8169_private *tp) @@ -5555,6
>> +5621,8 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>>               return nvecs;
>>
>>       tp->irq_nvecs = nvecs;
>> +     if (nvecs > 1)
>> +             tp->features |= RTL_VEC_MAP_ENABLE;
>>
>>       return 0;
>>  }
>> @@ -5822,10 +5890,79 @@ static bool rtl_aspm_is_safe(struct
>rtl8169_private *tp)
>>       return false;
>>  }
>>
>> +static int rtl8169_poll_msix_rx(struct napi_struct *napi, int budget)
>> +{
>> +     struct net_device *dev = napi->dev;
>> +     struct rtl8169_private *tp = netdev_priv(dev);
>> +     const int message_id = napi->index;
>> +     int work_done = 0;
>> +
>> +     if (message_id < tp->num_rx_rings)
>> +             work_done += rtl_rx(dev, tp, &tp->rx_ring[message_id],
>> + budget);
>> +
>> +     if (work_done < budget && napi_complete_done(napi, work_done))
>> +             rtl8169_enable_hw_interrupt_msix(tp, message_id);
>> +
>> +     return work_done;
>> +}
>> +
>> +static int rtl8169_poll_msix_tx(struct napi_struct *napi, int budget)
>> +{
>> +     struct net_device *dev = napi->dev;
>> +     struct rtl8169_private *tp = netdev_priv(dev);
>> +     const int message_id = napi->index;
>> +     int tx_ring_idx = message_id - 8;
>> +     unsigned int work_done = 0;
>> +
>> +     if (tx_ring_idx >= 0)
>> +             rtl_tx(dev, tp, budget);
>> +
>> +     if (work_done < budget && napi_complete_done(napi, work_done))
>> +             rtl8169_enable_hw_interrupt_msix(tp, message_id);
>> +
>> +     return work_done;
>> +}
>> +
>> +static int rtl8169_poll_msix_other(struct napi_struct *napi, int
>> +budget) {
>> +     struct net_device *dev = napi->dev;
>> +     struct rtl8169_private *tp = netdev_priv(dev);
>> +     const int message_id = napi - tp->rtl8169_napi;
>
>Why not use napi->index here too?
>
>> +
>> +     napi_complete_done(napi, budget);
>> +     rtl8169_enable_hw_interrupt_msix(tp, message_id);
>> +
>> +     return 1;
>> +}
>> +
>>  static void r8169_init_napi(struct rtl8169_private *tp)  {
>> -     for (int i = 0; i < tp->irq_nvecs; i++)
>> -             netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
>> +     for (int i = 0; i < tp->irq_nvecs; i++) {
>> +             if (tp->features & RTL_VEC_MAP_ENABLE) {
>> +                     switch (tp->hw_curr_isr_ver) {
>> +                     case RTL_ISR_VER_8127:
>
>A comment describing the RTL8127 MSI-X vector layout would be helpful here.
>Otherwise the following is hard to understand.
>
>> +                             if (i < R8127_MAX_RX_QUEUES)
>> +                                     netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
>> +                                                    rtl8169_poll_msix_rx);
>> +                             else if (i >= R8127_MAX_RX_QUEUES &&
>> +                                      i < (R8127_MAX_RX_QUEUES +
>> +                                      R8127_MAX_TX_QUEUES))
>> +                                     netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
>> +                                                    rtl8169_poll_msix_tx);
>> +                             else
>> +                                     netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
>> +                                                    rtl8169_poll_msix_other);
>> +                             break;
>> +                     default:
>> +                             netif_napi_add(tp->dev, &tp->rtl8169_napi[i],
>> +                                            rtl8169_poll);
>> +                             break;
>> +                     }
>> +             } else {
>> +                     netif_napi_add(tp->dev, &tp->rtl8169_napi[i], rtl8169_poll);
>> +             }
>
>This seems to be unnecessarily complex and can be simplified.
>
>> +             tp->rtl8169_napi[i].index = i;
>> +     }
>>  }
>>
>>  static int rtl_init_one(struct pci_dev *pdev, const struct
>> pci_device_id *ent)


^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [Patch net-next v3 5/7] r8169: add support and enable rss
  2026-05-16 22:07   ` Heiner Kallweit
@ 2026-05-18 11:17     ` Javen
  0 siblings, 0 replies; 17+ messages in thread
From: Javen @ 2026-05-18 11:17 UTC (permalink / raw)
  To: Heiner Kallweit, nic_swsd@realtek.com, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, horms@kernel.org
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org

>On 13.05.2026 13:55, javen wrote:
>> From: Javen Xu <javen_xu@realsil.com.cn>
>>
>> This patch adds support and enable rss for RTL8127.
>>
>> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
>> ---
>> Changes in v2:
>>  - some changes moved from Patch 2/7
>> Changes in v3:
>>  - add struct rtl8169_rss_data. Allocate it dynamically when needed.
>>  - define rss_key as an u32 array
>>  - replace some magic bit numbers in rtl8169_set_rss_hash_opt() and
>>    rtl8125_set_rx_q_num()
>>  - use union to combine different rx descriptor, refactor struct
>> RxDesc
>>  - remove dead code from rtl8169_double_check_rss_support()
>> ---
>>  drivers/net/ethernet/realtek/r8169_main.c | 405
>> ++++++++++++++++++++--
>>  1 file changed, 371 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c
>> b/drivers/net/ethernet/realtek/r8169_main.c
>> index b9c505e4bc0a..b90375cef724 100644
>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>> @@ -82,6 +82,19 @@
>>  #define R8169_MAX_MSIX_VEC   32
>>  #define R8127_MAX_RX_QUEUES  8
>>  #define R8169_DEFAULT_RX_QUEUES      1
>> +#define R8127_MAX_IRQ                32
>> +#define R8127_MIN_IRQ                30
>> +#define R8169_IRQ_DEFAULT    1
>
>These name are somewhat misleading. Would something like
>R8127_MAX_NUM_IRQVEC better describe the usage?
>
>> +#define RTL_RSS_KEY_SIZE     40
>> +#define RSS_CPU_NUM_MASK     GENMASK(18, 16)
>> +#define RSS_HASH_MASK                GENMASK(10, 8)
>> +#define RTL_MAX_INDIRECTION_TABLE_ENTRIES 128
>> +#define RXS_RSS_UDP          BIT(27)
>> +#define RXS_RSS_IPV4         BIT(28)
>> +#define RXS_RSS_IPV6         BIT(29)
>> +#define RXS_RSS_TCP          BIT(30)
>> +#define RXS_RSS_L3_TYPE_MASK (RXS_RSS_IPV4 | RXS_RSS_IPV6) #define
>> +RXS_RSS_L4_TYPE_MASK (RXS_RSS_TCP | RXS_RSS_UDP)
>>
>>  #define OCP_STD_PHY_BASE     0xa400
>>
>> @@ -592,6 +605,25 @@ enum rtl_register_content {
>>  #define      ISRIMR_LINKCHG  BIT(29)
>>  #define      ISRIMR_TOK_Q0   BIT(8)
>>  #define      ISRIMR_ROK_Q0   BIT(0)
>> +#define RTL_DESC_TYPE_CTRL           0xd8
>> +#define RSS_KEY_REG                  0x4600
>> +#define RSS_INDIRECTION_TBL_REG              0x4700
>> +#define RSS_CTRL_TCP_IPV4_SUPP               BIT(0)
>> +#define RTL_DESC_TYPE_RSS            BIT(1)
>> +#define RSS_CTRL_IPV4_SUPP           BIT(1)
>> +#define RSS_CTRL_TCP_IPV6_SUPP               BIT(2)
>> +#define RSS_CTRL_IPV6_SUPP           BIT(3)
>> +#define RSS_CTRL_IPV6_EXT_SUPP               BIT(4)
>> +#define RSS_CTRL_TCP_IPV6_EXT_SUPP   BIT(5)
>> +#define RSS_CTRL_UDP_IPV4_SUPP               BIT(6)
>> +#define RSS_CTRL_UDP_IPV6_SUPP               BIT(7)
>> +#define RSS_CTRL_UDP_IPV6_EXT_SUPP   BIT(8)
>> +#define RTL_RSS_FLAG_HASH_UDP_IPV4   BIT(0)
>> +#define RTL_RSS_FLAG_HASH_UDP_IPV6   BIT(1)
>> +#define      RX_RES_RSS                      BIT(22)
>> +#define      RX_RUNT_RSS                     BIT(21)
>> +#define      RX_CRC_RSS                      BIT(20)
>> +#define RTL_RX_Q_NUM_MASK            GENMASK(4, 2)
>>  };
>>
>>  enum rtl_isr_version {
>> @@ -654,6 +686,11 @@ enum rtl_rx_desc_bit {
>>  #define RxProtoIP    (PID1 | PID0)
>>  #define RxProtoMask  RxProtoIP
>>
>> +#define      RX_UDPT_DESC_RSS        BIT(19)
>> +#define      RX_TCPT_DESC_RSS        BIT(18)
>> +#define      RX_UDPF_DESC_RSS        BIT(16) /* UDP/IP checksum failed */
>> +#define      RX_TCPF_DESC_RSS        BIT(15) /* TCP/IP checksum failed */
>> +
>>       IPFail          = (1 << 16), /* IP checksum failed */
>>       UDPFail         = (1 << 15), /* UDP/IP checksum failed */
>>       TCPFail         = (1 << 14), /* TCP/IP checksum failed */
>> @@ -675,9 +712,27 @@ struct TxDesc {
>>  };
>>
>>  struct RxDesc {
>> -     __le32 opts1;
>> -     __le32 opts2;
>> -     __le64 addr;
>> +     union {
>> +             /* RX_DESC_RING_TYPE_DEFAULT */
>> +             struct {
>> +                     __le32 opts1;
>> +                     __le32 opts2;
>> +                     __le64 addr;
>> +             };
>> +
>> +             /* RX_DESC_RING_TYPE_RSS */
>> +             struct {
>> +                     union {
>> +                             __le64 rss_addr;
>> +                             struct {
>> +                                     __le32 rss_info;
>> +                                     __le32 rss_result;
>> +                             } rss_dword;
>> +                     };
>> +                     __le32 rss_opts2;
>> +                     __le32 rss_opts1;
>> +             };
>> +     };
>>  };
>>
>>  struct ring_info {
>> @@ -764,6 +819,13 @@ struct rtl8169_rx_ring {
>>       struct page *rx_databuff[NUM_RX_DESC];          /* Rx data buffers */
>>  };
>>
>> +struct rtl8169_rss_data {
>> +     u32 rss_flags;
>> +     u32 rss_key[RTL_RSS_KEY_SIZE / sizeof(u32)];
>> +     u8 rss_indir_tbl[RTL_MAX_INDIRECTION_TABLE_ENTRIES];
>> +     u8 hw_supp_indir_tbl_entries;
>
>Like in other places: Why not use unsigned int here?
>
>> +};
>> +
>>  struct rtl8169_private {
>>       void __iomem *mmio_addr;        /* memory map physical address */
>>       struct pci_dev *pci_dev;
>> @@ -783,9 +845,11 @@ struct rtl8169_private {
>>       u16 tx_lpi_timer;
>>       u32 irq_mask;
>>       u16 hw_supp_num_rx_queues;
>> +     struct rtl8169_rss_data *rss_data;
>>       enum rtl_isr_version hw_supp_isr_ver;
>>       enum rtl_isr_version hw_curr_isr_ver;
>>       u8 irq_nvecs;
>> +     u8 init_rx_desc_type;
>
>Type should be the respective enum, not u8.
>
>>       bool recheck_desc_ownbit;
>>       unsigned int features;
>>       int irq;
>> @@ -1620,6 +1684,13 @@ static bool rtl_dash_is_enabled(struct
>rtl8169_private *tp)
>>       }
>>  }
>>
>> +static bool rtl_check_rss_support(struct rtl8169_private *tp) {
>> +     if (tp->mac_version == RTL_GIGA_MAC_VER_80)
>> +             return true;
>
>Typically an empty line is inserted before the return statement.
>
>> +     return false;
>> +}
>> +
>>  static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private
>> *tp)  {
>>       switch (tp->mac_version) {
>> @@ -1919,9 +1990,20 @@ static inline u32 rtl8169_tx_vlan_tag(struct
>sk_buff *skb)
>>               TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;  }
>>
>> -static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff
>> *skb)
>> +static void rtl8169_rx_vlan_tag(struct rtl8169_private *tp,
>> +                             struct RxDesc *desc,
>> +                             struct sk_buff *skb)
>>  {
>> -     u32 opts2 = le32_to_cpu(desc->opts2);
>> +     u32 opts2;
>> +
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             opts2 = le32_to_cpu(desc->rss_opts2);
>> +             break;
>> +     default:
>> +             opts2 = le32_to_cpu(desc->opts2);
>> +             break;
>> +     }
>>
>>       if (opts2 & RxVlanTag)
>>               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
>> swab16(opts2 & 0xffff)); @@ -2750,6 +2832,14 @@ static void
>rtl_hw_reset(struct rtl8169_private *tp)
>>       rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);  }
>>
>> +static void rtl8169_init_rss(struct rtl8169_private *tp) {
>> +     for (int i = 0; i < tp->rss_data->hw_supp_indir_tbl_entries; i++)
>> +             tp->rss_data->rss_indir_tbl[i] =
>> +ethtool_rxfh_indir_default(i, tp->num_rx_rings);
>> +
>> +     netdev_rss_key_fill(tp->rss_data->rss_key, RTL_RSS_KEY_SIZE); }
>> +
>>  static void rtl_software_parameter_initialize(struct rtl8169_private
>> *tp)  {
>>       tp->num_rx_rings = 1;
>> @@ -2757,6 +2847,7 @@ static void
>rtl_software_parameter_initialize(struct rtl8169_private *tp)
>>       switch (tp->mac_version) {
>>       case RTL_GIGA_MAC_VER_80:
>>               tp->hw_supp_num_rx_queues = R8127_MAX_RX_QUEUES;
>> +             tp->rss_data->hw_supp_indir_tbl_entries =
>> + RTL_MAX_INDIRECTION_TABLE_ENTRIES;
>>               tp->hw_supp_isr_ver = RTL_ISR_VER_8127;
>>               break;
>>       default:
>> @@ -2764,6 +2855,7 @@ static void
>rtl_software_parameter_initialize(struct rtl8169_private *tp)
>>               tp->hw_supp_isr_ver = RTL_ISR_VER_DEFAULT;
>>               break;
>>       }
>> +     tp->init_rx_desc_type = RX_DESC_RING_TYPE_DEFAULT;
>>       tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;  }
>>
>> @@ -2889,6 +2981,72 @@ static void rtl_set_rx_max_size(struct
>rtl8169_private *tp)
>>       RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);  }
>>
>> +static void rtl8169_store_rss_key(struct rtl8169_private *tp) {
>> +     u32 *rss_key = tp->rss_data->rss_key;
>> +     const u16 rss_key_reg = RSS_KEY_REG;
>> +     u32 num_entries = RTL_RSS_KEY_SIZE / sizeof(u32);
>> +
>> +     /* Write redirection table to HW */
>> +     for (int i = 0; i < num_entries; i++)
>> +             RTL_W32(tp, rss_key_reg + (i * 4), rss_key[i]); }
>> +
>> +static void rtl8169_store_reta(struct rtl8169_private *tp) {
>> +     u16 indir_tbl_reg = RSS_INDIRECTION_TBL_REG;
>> +     u32 i, reta_entries = tp->rss_data->hw_supp_indir_tbl_entries;
>> +     u32 reta = 0;
>> +     u8 *indir_tbl = tp->rss_data->rss_indir_tbl;
>> +
>> +     /* Write redirection table to HW */
>> +     for (i = 0; i < reta_entries; i++) {
>> +             reta |= indir_tbl[i] << (i & 0x3) * 8;
>> +             if ((i & 3) == 3) {
>> +                     RTL_W32(tp, indir_tbl_reg, reta);
>> +                     indir_tbl_reg += 4;
>> +                     reta = 0;
>> +             }
>> +     }
>> +}
>> +
>> +static int rtl8169_set_rss_hash_opt(struct rtl8169_private *tp) {
>> +     u32 rss_flags = tp->rss_data->rss_flags;
>> +     u32 rss_ctrl;
>> +
>> +     rss_ctrl = FIELD_PREP(RSS_CPU_NUM_MASK,
>> + ilog2(tp->num_rx_rings));
>> +
>> +     /* Perform hash on these packet types */
>> +     rss_ctrl |= RSS_CTRL_TCP_IPV4_SUPP
>> +              | RSS_CTRL_IPV4_SUPP
>> +              | RSS_CTRL_IPV6_SUPP
>> +              | RSS_CTRL_IPV6_EXT_SUPP
>> +              | RSS_CTRL_TCP_IPV6_SUPP
>> +              | RSS_CTRL_TCP_IPV6_EXT_SUPP;
>> +
>> +     if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV4)
>> +             rss_ctrl |= RSS_CTRL_UDP_IPV4_SUPP;
>> +
>> +     if (rss_flags & RTL_RSS_FLAG_HASH_UDP_IPV6)
>> +             rss_ctrl |= RSS_CTRL_UDP_IPV6_SUPP |
>> +                         RSS_CTRL_UDP_IPV6_EXT_SUPP;
>> +
>> +     rss_ctrl |= FIELD_PREP(RSS_HASH_MASK,
>> +
>> + ilog2(tp->rss_data->hw_supp_indir_tbl_entries));
>> +
>> +     RTL_W32(tp, RSS_CTRL_8125, rss_ctrl);
>> +
>> +     return 0;
>> +}
>> +
>> +static void rtl_set_rss_config(struct rtl8169_private *tp) {
>> +     rtl8169_set_rss_hash_opt(tp);
>> +     rtl8169_store_reta(tp);
>> +     rtl8169_store_rss_key(tp);
>> +}
>> +
>>  static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
>> {
>>       struct rtl8169_rx_ring *ring = &tp->rx_ring[0]; @@ -3955,6
>> +4113,18 @@ DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
>>       return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);  }
>>
>> +static void rtl8125_set_rx_q_num(struct rtl8169_private *tp) {
>> +     u16 q_ctrl;
>> +     u16 rx_q_num;
>> +
>> +     rx_q_num = (u16)ilog2(tp->num_rx_rings);
>> +     q_ctrl = RTL_R16(tp, Q_NUM_CTRL_8125);
>> +     q_ctrl &= ~RTL_RX_Q_NUM_MASK;
>> +     q_ctrl |= FIELD_PREP(RTL_RX_Q_NUM_MASK, rx_q_num);
>> +     RTL_W16(tp, Q_NUM_CTRL_8125, q_ctrl); }
>> +
>>  static void rtl8169_hw_enable_vec_mapping(struct rtl8169_private *tp)
>> {
>>       u8 tmp;
>> @@ -3994,6 +4164,13 @@ static void rtl_hw_start_8125_common(struct
>rtl8169_private *tp)
>>           tp->mac_version == RTL_GIGA_MAC_VER_80)
>>               RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02);
>>
>> +     /* enable rx descriptor type v4 and set queue num for rss*/
>> +     if (tp->rss_enable) {
>> +             rtl8125_set_rx_q_num(tp);
>> +             RTL_W8(tp, RTL_DESC_TYPE_CTRL,
>> +                    RTL_R8(tp, RTL_DESC_TYPE_CTRL) | RTL_DESC_TYPE_RSS);
>> +     }
>> +
>>       if (tp->mac_version == RTL_GIGA_MAC_VER_80)
>>               r8168_mac_ocp_modify(tp, 0xe614, 0x0f00, 0x0f00);
>>       else if (tp->mac_version == RTL_GIGA_MAC_VER_70) @@ -4230,6
>> +4407,12 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
>>       rtl_hw_aspm_clkreq_enable(tp, true);
>>       rtl_set_rx_max_size(tp);
>>       rtl_set_rx_tx_desc_registers(tp);
>> +     if (rtl_is_8125(tp)) {
>> +             if (tp->rss_enable)
>> +                     rtl_set_rss_config(tp);
>> +             else
>> +                     RTL_W32(tp, RSS_CTRL_8125, 0x00);
>> +     }
>>       rtl_lock_config_regs(tp);
>>
>>       rtl_jumbo_config(tp);
>> @@ -4257,14 +4440,26 @@ static int rtl8169_change_mtu(struct net_device
>*dev, int new_mtu)
>>       return 0;
>>  }
>>
>> -static void rtl8169_mark_to_asic(struct RxDesc *desc)
>> +static void rtl8169_mark_to_asic(struct rtl8169_private *tp, struct
>> +RxDesc *desc)
>>  {
>> -     u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
>> +     u32 eor;
>>
>> -     desc->opts2 = 0;
>> -     /* Force memory writes to complete before releasing descriptor */
>> -     dma_wmb();
>> -     WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor |
>R8169_RX_BUF_SIZE));
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             eor = le32_to_cpu(desc->rss_opts1) & RingEnd;
>> +             desc->rss_opts2 = 0;
>> +             /* Force memory writes to complete before releasing descriptor */
>> +             dma_wmb();
>> +             WRITE_ONCE(desc->rss_opts1, cpu_to_le32(DescOwn | eor |
>R8169_RX_BUF_SIZE));
>> +             break;
>> +     default:
>> +             eor = le32_to_cpu(desc->opts1) & RingEnd;
>> +             desc->opts2 = 0;
>> +             /* Force memory writes to complete before releasing descriptor */
>> +             dma_wmb();
>> +             WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor |
>R8169_RX_BUF_SIZE));
>> +             break;
>> +     }
>>  }
>>
>>  static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>> @@ -4287,9 +4482,12 @@ static struct page *rtl8169_alloc_rx_data(struct
>rtl8169_private *tp,
>>               return NULL;
>>       }
>>
>> -     desc->addr = cpu_to_le64(mapping);
>>       ring->rx_desc_phy_addr[index] = mapping;
>> -     rtl8169_mark_to_asic(desc);
>> +     if (tp->init_rx_desc_type == RX_DESC_RING_TYPE_RSS)
>> +             desc->rss_addr = cpu_to_le64(mapping);
>> +     else
>> +             desc->addr = cpu_to_le64(mapping);
>> +     rtl8169_mark_to_asic(tp, desc);
>>
>>       return data;
>>  }
>> @@ -4310,6 +4508,18 @@ static void rtl8169_rx_clear(struct rtl8169_private
>*tp, struct rtl8169_rx_ring
>>       }
>>  }
>>
>> +static void rtl8169_mark_as_last_descriptor(struct rtl8169_private
>> +*tp, struct RxDesc *desc) {
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             desc->rss_opts1 |= cpu_to_le32(RingEnd);
>> +             break;
>> +     default:
>> +             desc->opts1 |= cpu_to_le32(RingEnd);
>> +             break;
>> +     }
>> +}
>> +
>>  static int rtl8169_rx_fill(struct rtl8169_private *tp, struct
>> rtl8169_rx_ring *ring)  {
>>       int i;
>> @@ -4326,7 +4536,7 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp,
>struct rtl8169_rx_ring *r
>>       }
>>
>>       /* mark as last descriptor in the ring */
>> -     ring->rx_desc_array[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
>> +     rtl8169_mark_as_last_descriptor(tp,
>> + &ring->rx_desc_array[NUM_RX_DESC - 1]);
>>
>>       return 0;
>>  }
>> @@ -4476,7 +4686,7 @@ static void rtl8169_rx_desc_reset(struct
>rtl8169_private *tp)
>>               struct rtl8169_rx_ring *ring = &tp->rx_ring[i];
>>
>>               for (int j = 0; j < NUM_RX_DESC; j++)
>> -                     rtl8169_mark_to_asic(ring->rx_desc_array + j);
>> +                     rtl8169_mark_to_asic(tp, ring->rx_desc_array +
>> + j);
>>       }
>>  }
>>
>> @@ -4937,35 +5147,104 @@ static inline int rtl8169_fragmented_frame(u32
>status)
>>       return (status & (FirstFrag | LastFrag)) != (FirstFrag |
>> LastFrag);  }
>>
>> -static inline void rtl8169_rx_csum(struct sk_buff *skb,
>> +static inline void rtl8169_rx_hash(struct rtl8169_private *tp,
>> +                                struct RxDesc *desc,
>> +                                struct sk_buff *skb) {
>> +     u32 rss_header_info;
>> +     u32 hash_val;
>> +
>> +     if (!(tp->dev->features & NETIF_F_RXHASH))
>> +             return;
>> +
>> +     rss_header_info = le32_to_cpu(desc->rss_dword.rss_info);
>> +
>> +     if (!(rss_header_info & RXS_RSS_L3_TYPE_MASK))
>> +             return;
>> +
>> +     hash_val = le32_to_cpu(desc->rss_dword.rss_result);
>> +
>> +     skb_set_hash(skb, hash_val,
>> +                  (RXS_RSS_L4_TYPE_MASK & rss_header_info) ?
>> +                  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); }
>> +
>> +static inline void rtl8169_rx_csum(struct rtl8169_private *tp,
>> +                                struct sk_buff *skb,
>>                                  struct RxDesc *desc)  {
>> -     u32 status = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
>> +     bool csum_ok = false;
>> +     u32 opts1;
>>
>> -     if (status == RxProtoTCP || status == RxProtoUDP)
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             opts1 = le32_to_cpu(desc->rss_opts1);
>> +             if (((opts1 & RX_TCPT_DESC_RSS) && !(opts1 & RX_TCPF_DESC_RSS))
>||
>> +                 ((opts1 & RX_UDPT_DESC_RSS) && !(opts1 &
>RX_UDPF_DESC_RSS)))
>> +                     csum_ok = true;
>> +             break;
>> +     default:
>> +             opts1 = le32_to_cpu(desc->opts1) & (RxProtoMask | RxCSFailMask);
>> +             if (opts1 == RxProtoTCP || opts1 == RxProtoUDP)
>> +                     csum_ok = true;
>> +             break;
>> +     }
>> +
>> +     if (csum_ok)
>>               skb->ip_summed = CHECKSUM_UNNECESSARY;
>>       else
>>               skb_checksum_none_assert(skb);  }
>>
>> +static u32 rtl8169_rx_desc_opts1(struct rtl8169_private *tp, struct
>> +RxDesc *desc) {
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             return READ_ONCE(desc->rss_opts1);
>> +     default:
>> +             return READ_ONCE(desc->opts1);
>> +     }
>> +}
>> +
>>  static bool rtl8169_check_rx_desc_error(struct net_device *dev,
>>                                       struct rtl8169_private *tp,
>>                                       u32 status)  {
>> -     if (unlikely(status & RxRES)) {
>> -             if (status & (RxRWT | RxRUNT))
>> -                     dev->stats.rx_length_errors++;
>> -             if (status & RxCRC)
>> -                     dev->stats.rx_crc_errors++;
>> -             return true;
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             if (unlikely(status & RX_RES_RSS)) {
>> +                     if (status & RX_RUNT_RSS)
>> +                             dev->stats.rx_length_errors++;
>> +                     if (status & RX_CRC_RSS)
>> +                             dev->stats.rx_crc_errors++;
>> +                     return true;
>> +             }
>> +             break;
>> +     default:
>> +             if (unlikely(status & RxRES)) {
>> +                     if (status & (RxRWT | RxRUNT))
>> +                             dev->stats.rx_length_errors++;
>> +                     if (status & RxCRC)
>> +                             dev->stats.rx_crc_errors++;
>> +                     return true;
>> +             }
>> +             break;
>>       }
>>       return false;
>>  }
>>
>> -static void rtl8169_set_desc_dma_addr(struct RxDesc *desc,
>> +static void rtl8169_set_desc_dma_addr(struct rtl8169_private *tp,
>> +                                   struct RxDesc *desc,
>>                                     dma_addr_t mapping)  {
>> -     desc->addr = cpu_to_le64(mapping);
>> +     switch (tp->init_rx_desc_type) {
>> +     case RX_DESC_RING_TYPE_RSS:
>> +             desc->rss_addr = cpu_to_le64(mapping);
>> +             break;
>> +     default:
>> +             desc->addr = cpu_to_le64(mapping);
>> +             break;
>> +     }
>>  }
>>
>>  static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>> @@ -4982,7 +5261,7 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp,
>>               dma_addr_t addr;
>>               u32 status;
>>
>> -             status = le32_to_cpu(READ_ONCE(desc->opts1));
>> +             status = le32_to_cpu(rtl8169_rx_desc_opts1(tp, desc));
>>
>>               if (status & DescOwn) {
>>                       if (!tp->recheck_desc_ownbit) @@ -4996,7 +5275,7
>> @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>>                        */
>>                       tp->recheck_desc_ownbit = false;
>>                       rtl8169_desc_quirk(tp);
>> -                     status = le32_to_cpu(READ_ONCE(desc->opts1));
>> +                     status = le32_to_cpu(rtl8169_rx_desc_opts1(tp,
>> + desc));
>>                       if (status & DescOwn)
>>                               break;
>>               }
>> @@ -5045,11 +5324,12 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp,
>>               skb->tail += pkt_size;
>>               skb->len = pkt_size;
>>               dma_sync_single_for_device(d, addr, pkt_size,
>> DMA_FROM_DEVICE);
>> -
>> -             rtl8169_rx_csum(skb, desc);
>> +             if (tp->rss_enable)
>> +                     rtl8169_rx_hash(tp, desc, skb);
>> +             rtl8169_rx_csum(tp, skb, desc);
>>               skb->protocol = eth_type_trans(skb, dev);
>>
>> -             rtl8169_rx_vlan_tag(desc, skb);
>> +             rtl8169_rx_vlan_tag(tp, desc, skb);
>>
>>               if (skb->pkt_type == PACKET_MULTICAST)
>>                       dev->stats.multicast++; @@ -5058,8 +5338,8 @@
>> static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp,
>>
>>               dev_sw_netstats_rx_add(dev, pkt_size);
>>  release_descriptor:
>> -             rtl8169_set_desc_dma_addr(desc, ring->rx_desc_phy_addr[entry]);
>> -             rtl8169_mark_to_asic(desc);
>> +             rtl8169_set_desc_dma_addr(tp, desc, ring-
>>rx_desc_phy_addr[entry]);
>> +             rtl8169_mark_to_asic(tp, desc);
>>       }
>>
>>       return count;
>> @@ -5607,6 +5887,43 @@ static void rtl_set_irq_mask(struct
>rtl8169_private *tp)
>>       }
>>  }
>>
>> +static int get_max_irq_nvecs(struct rtl8169_private *tp) {
>> +     if (tp->mac_version == RTL_GIGA_MAC_VER_80)
>> +             return R8127_MAX_IRQ;
>> +     return R8169_IRQ_DEFAULT;
>> +}
>> +
>> +static int get_min_irq_nvecs(struct rtl8169_private *tp) {
>> +     if (tp->mac_version == RTL_GIGA_MAC_VER_80)
>> +             return R8127_MIN_IRQ;
>> +     return R8169_IRQ_DEFAULT;
>> +}
>> +
>> +static void rtl8169_double_check_rss_support(struct rtl8169_private
>> +*tp)
>
>Needing such a function indicates that checks in other places are not sufficient.

I have remove this function directly. And replace it with rrtl8169_set_rx_ring_num because the num_rx_ring should set according to irq_nvecs.
Now the driver use tp->irq_nvecs to control new interrupt mapping, and use tp->num_rx_ring to control whether to enable rss. Hw_curr_isr_ver and tp->feature are of no use.
All the reverse xmas tree format have been checked and modified in patch net-next v4.
Thanks for your time.

BRs,
Javen
>
>> +{
>> +     if (tp->hw_curr_isr_ver > RTL_ISR_VER_DEFAULT) {
>> +             if (!(tp->features & RTL_VEC_MAP_ENABLE) ||
>> +tp->irq_nvecs < get_min_irq_nvecs(tp))
>
>Can this happen? If yes, then why not adjusting hw_curr_isr_ver in a first
>place?
>
>> +                     tp->hw_curr_isr_ver = RTL_ISR_VER_8127;
>
>This looks wrong.
>
>> +     }
>> +
>> +     if (tp->rss_support && tp->hw_curr_isr_ver > RTL_ISR_VER_DEFAULT) {
>> +             u8 rss_queue_num = netif_get_num_default_rss_queues();
>> +
>> +             tp->num_rx_rings = min(rss_queue_num, tp-
>>hw_supp_num_rx_queues);
>> +             if (!(tp->num_rx_rings >= 2 && tp->irq_nvecs >=
>get_min_irq_nvecs(tp)))
>> +                     tp->num_rx_rings = 1;
>> +     }
>> +
>> +     tp->rss_enable = 0;
>> +
>> +     if (tp->num_rx_rings >= 2) {
>> +             tp->rss_enable = 1;
>
>Function name just mentions a check, but you set/change values here.
>
>
>> +             tp->init_rx_desc_type = RX_DESC_RING_TYPE_RSS;
>> +     }
>> +}
>> +
>>  static int rtl_alloc_irq(struct rtl8169_private *tp)  {
>>       struct pci_dev *pdev = tp->pci_dev; @@ -5627,7 +5944,10 @@
>> static int rtl_alloc_irq(struct rtl8169_private *tp)
>>               break;
>>       }
>>
>> -     nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
>> +     nvecs = pci_alloc_irq_vectors(pdev, get_min_irq_nvecs(tp),
>> + get_max_irq_nvecs(tp), flags);
>> +
>> +     if (nvecs < 0)
>> +             nvecs = pci_alloc_irq_vectors(pdev, 1, 1, flags);
>>
>>       if (nvecs < 0)
>>               return nvecs;
>> @@ -6069,6 +6389,13 @@ static int rtl_init_one(struct pci_dev *pdev,
>> const struct pci_device_id *ent)
>>
>>       tp->dash_type = rtl_get_dash_type(tp);
>>       tp->dash_enabled = rtl_dash_is_enabled(tp);
>> +     tp->rss_support = rtl_check_rss_support(tp);
>> +
>> +     if (tp->rss_support) {
>> +             tp->rss_data = devm_kzalloc(&pdev->dev, sizeof(*tp->rss_data),
>GFP_KERNEL);
>> +             if (!tp->rss_data)
>> +                     return -ENOMEM;
>> +     }
>>
>>       tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
>>
>> @@ -6095,6 +6422,11 @@ static int rtl_init_one(struct pci_dev *pdev, const
>struct pci_device_id *ent)
>>       if (!tp->rtl8169_napi)
>>               return -ENOMEM;
>>
>> +     rtl8169_double_check_rss_support(tp);
>> +
>> +     if (tp->rss_support)
>> +             rtl8169_init_rss(tp);
>> +
>>       INIT_WORK(&tp->wk.work, rtl_task);
>>       disable_work(&tp->wk.work);
>>
>> @@ -6112,6 +6444,11 @@ static int rtl_init_one(struct pci_dev *pdev, const
>struct pci_device_id *ent)
>>       dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
>>       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
>>
>> +     if (tp->rss_support) {
>> +             dev->hw_features |= NETIF_F_RXHASH;
>> +             dev->features |= NETIF_F_RXHASH;
>
>Seems to me like you have different flags, all with the same meaning:
>RSS active, MSI-X vec mapping used, hw_curr_isr_ver > DEFAULT, ..
>Can't this be consolidated?
>
>> +     }
>> +
>>       /*
>>        * Pretend we are using VLANs; This bypasses a nasty bug where
>>        * Interrupts stop flowing on high load on 8110SCd controllers.


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2026-05-18 11:18 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-13 11:55 [Patch net-next v3 0/7] r8169: add RSS support for RTL8127 javen
2026-05-13 11:55 ` [Patch net-next v3 1/7] r8169: add support for multi irqs javen
2026-05-16 22:07   ` Heiner Kallweit
2026-05-13 11:55 ` [Patch net-next v3 2/7] r8169: add support for multi rx queues javen
2026-05-16 22:07   ` Heiner Kallweit
2026-05-18  7:47     ` Javen
2026-05-13 11:55 ` [Patch net-next v3 3/7] r8169: add support for new interrupt mapping javen
2026-05-16 22:07   ` Heiner Kallweit
2026-05-18  8:21     ` Javen
2026-05-13 11:55 ` [Patch net-next v3 4/7] r8169: enable " javen
2026-05-13 11:55 ` [Patch net-next v3 5/7] r8169: add support and enable rss javen
2026-05-15  0:21   ` Jakub Kicinski
2026-05-16 22:07   ` Heiner Kallweit
2026-05-18 11:17     ` Javen
2026-05-13 11:55 ` [Patch net-next v3 6/7] r8169: move struct ethtool_ops javen
2026-05-13 11:55 ` [Patch net-next v3 7/7] r8169: add support for ethtool javen
2026-05-16 22:07   ` Heiner Kallweit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox