public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Heiner Kallweit <hkallweit1@gmail.com>
To: javen <javen_xu@realsil.com.cn>,
	nic_swsd@realtek.com, andrew+netdev@lunn.ch, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	horms@kernel.org
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [Patch net-next v1 1/7] r8169: add support for multi irqs
Date: Wed, 6 May 2026 23:28:52 +0200	[thread overview]
Message-ID: <bbbb2a7d-c6a3-4dcf-94f9-0e580d43897e@gmail.com> (raw)
In-Reply-To: <20260506081326.767-2-javen_xu@realsil.com.cn>

On 06.05.2026 10:13, javen wrote:
> From: Javen Xu <javen_xu@realsil.com.cn>
> 
> RSS uses multi rx queues to receive packets, and each rx queue needs one
> irq and napi. So this patch adds support for multi irqs and napi here.
> 
> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 199 ++++++++++++++++++++--
>  1 file changed, 184 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 791277e750ba..ef74ee02c117 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -77,6 +77,7 @@
>  #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
>  #define R8169_TX_STOP_THRS	(MAX_SKB_FRAGS + 1)
>  #define R8169_TX_START_THRS	(2 * R8169_TX_STOP_THRS)
> +#define R8169_MAX_MSIX_VEC	32
>  
>  #define OCP_STD_PHY_BASE	0xa400
>  
> @@ -435,6 +436,8 @@ enum rtl8125_registers {
>  #define INT_CFG0_CLKREQEN		BIT(3)
>  	IntrMask_8125		= 0x38,
>  	IntrStatus_8125		= 0x3c,
> +	INTR_VEC_MAP_MASK	= 0x800,
> +	INTR_VEC_MAP_STATUS	= 0x802,

These register names don't have a chip version reference.
Does this mean they can be used on other chip versions
with RSS as well?

>  	INT_CFG1_8125		= 0x7a,
>  	LEDSEL2			= 0x84,
>  	LEDSEL1			= 0x86,
> @@ -728,6 +731,19 @@ enum rtl_dash_type {
>  	RTL_DASH_25_BP,
>  };
>  
> +struct rtl8169_napi {
> +	struct napi_struct napi;
> +	void *priv;
> +	int index;

It seems the index is never used in this patch.

> +};
> +
> +struct rtl8169_irq {
> +	irq_handler_t	handler;
> +	unsigned int	vector;
> +	u8		requested;
> +	char		name[IFNAMSIZ + 10];
> +};
> +
>  struct rtl8169_private {
>  	void __iomem *mmio_addr;	/* memory map physical address */
>  	struct pci_dev *pci_dev;
> @@ -745,9 +761,19 @@ struct rtl8169_private {
>  	dma_addr_t RxPhyAddr;
>  	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
> +	struct rtl8169_irq irq_tbl[R8169_MAX_MSIX_VEC];
> +	struct rtl8169_napi r8169napi[R8169_MAX_MSIX_VEC];
> +	u16 isr_reg[R8169_MAX_MSIX_VEC];
> +	u16 imr_reg[R8169_MAX_MSIX_VEC];

These arrays result in unecessarily high memory consumption on all other
chip versions. Can't they be dynamically allocated, only in case driver
supports RSS for the respective chip version?

> +	unsigned int num_rx_rings;
>  	u16 cp_cmd;
>  	u16 tx_lpi_timer;
>  	u32 irq_mask;
> +	u8 min_irq_nvecs;
> +	u8 max_irq_nvecs;

It seems these values are actually constants.
Can't we avoid these members?

> +	u8 hw_supp_isr_ver;
> +	u8 hw_curr_isr_ver;
> +	u8 irq_nvecs;
>  	int irq;
>  	struct clk *clk;
>  
> @@ -763,6 +789,8 @@ struct rtl8169_private {
>  	unsigned aspm_manageable:1;
>  	unsigned dash_enabled:1;
>  	bool sfp_mode:1;
> +	bool rss_support:1;
> +	bool rss_enable:1;
>  	dma_addr_t counters_phys_addr;
>  	struct rtl8169_counters *counters;
>  	struct rtl8169_tc_offsets tc_offset;
> @@ -2680,6 +2708,44 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
>  	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
>  }
>  
> +static void rtl_setup_mqs_reg(struct rtl8169_private *tp)
> +{
> +	if (tp->mac_version <= RTL_GIGA_MAC_VER_52) {
> +		tp->isr_reg[0] = IntrStatus;
> +		tp->imr_reg[0] = IntrMask;
> +	} else {
> +		tp->isr_reg[0] = IntrStatus_8125;
> +		tp->imr_reg[0] = IntrMask_8125;
> +	}
> +
> +	for (int i = 1; i < tp->max_irq_nvecs; i++)
> +		tp->isr_reg[i] = (u16)(INTR_VEC_MAP_STATUS + (i - 1) * 4);
> +
> +	for (int i = 1; i < tp->max_irq_nvecs; i++)
> +		tp->imr_reg[i] = (u16)(INTR_VEC_MAP_MASK + (i - 1) * 4);

This populates the array with constant values. Therefore, can't you avoid
using this array?

> +}
> +
> +static void rtl_software_parameter_initialize(struct rtl8169_private *tp)
> +{
> +	tp->num_rx_rings = 1;
> +
> +	switch (tp->mac_version) {
> +	case RTL_GIGA_MAC_VER_80:
> +		tp->min_irq_nvecs = 1;
> +		tp->max_irq_nvecs = 1;
> +		tp->hw_supp_isr_ver = 6;

Magic value 6 requires at least an explanation and a constant.

> +		break;
> +	default:
> +		tp->min_irq_nvecs = 1;
> +		tp->max_irq_nvecs = 1;
> +		tp->hw_supp_isr_ver = 1;
> +		break;
> +	}
> +	tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;

This indicates that the current version can be set to a version
which is not the supported one. This is misleading.
- Is supp_isr_ver the highest supported isr version?
- And does this mean that each chip is backwards-compatible and
  supports also all lower isr versions?

> +
> +	rtl_setup_mqs_reg(tp);
> +}
> +
>  static void rtl_request_firmware(struct rtl8169_private *tp)
>  {
>  	struct rtl_fw *rtl_fw;
> @@ -4266,9 +4332,21 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
>  	netdev_reset_queue(tp->dev);
>  }
>  
> +static void rtl8169_napi_disable(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		napi_disable(&tp->r8169napi[i].napi);
> +}
> +
> +static void rtl8169_napi_enable(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++)
> +		napi_enable(&tp->r8169napi[i].napi);
> +}
> +
>  static void rtl8169_cleanup(struct rtl8169_private *tp)
>  {
> -	napi_disable(&tp->napi);
> +	rtl8169_napi_disable(tp);
>  
>  	/* Give a racing hard_start_xmit a few cycles to complete. */
>  	synchronize_net();
> @@ -4313,8 +4391,8 @@ static void rtl_reset_work(struct rtl8169_private *tp)
>  
>  	for (i = 0; i < NUM_RX_DESC; i++)
>  		rtl8169_mark_to_asic(tp->RxDescArray + i);
> +	rtl8169_napi_enable(tp);
>  
> -	napi_enable(&tp->napi);

This moves the empty line. It should remain where it is.

>  	rtl_hw_start(tp);
>  }
>  
> @@ -4820,7 +4898,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  			goto release_descriptor;
>  		}
>  
> -		skb = napi_alloc_skb(&tp->napi, pkt_size);
> +		skb = napi_alloc_skb(&tp->r8169napi[0].napi, pkt_size);
>  		if (unlikely(!skb)) {
>  			dev->stats.rx_dropped++;
>  			goto release_descriptor;
> @@ -4844,7 +4922,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (skb->pkt_type == PACKET_MULTICAST)
>  			dev->stats.multicast++;
>  
> -		napi_gro_receive(&tp->napi, skb);
> +		napi_gro_receive(&tp->r8169napi[0].napi, skb);
>  
>  		dev_sw_netstats_rx_add(dev, pkt_size);
>  release_descriptor:
> @@ -4856,7 +4934,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  
>  static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
> -	struct rtl8169_private *tp = dev_instance;
> +	struct rtl8169_napi *napi = dev_instance;
> +	struct rtl8169_private *tp = napi->priv;
>  	u32 status = rtl_get_events(tp);
>  
>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
> @@ -4873,13 +4952,53 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  		phy_mac_interrupt(tp->phydev);
>  
>  	rtl_irq_disable(tp);
> -	napi_schedule(&tp->napi);
> +	napi_schedule(&napi->napi);
>  out:
>  	rtl_ack_events(tp, status);
>  
>  	return IRQ_HANDLED;
>  }
>  
> +static void rtl8169_free_irq(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		struct rtl8169_irq *irq = &tp->irq_tbl[i];
> +		struct rtl8169_napi *napi = &tp->r8169napi[i];
> +
> +		if (irq->requested) {

Is this check actually needed? Wouldn't pci_free_irq()
also be fine with irqs not having been requested?

> +			irq->requested = 0;
> +			pci_free_irq(tp->pci_dev, i, napi);
> +		}
> +	}
> +}
> +
> +static int rtl8169_request_irq(struct rtl8169_private *tp)
> +{
> +	const int len = sizeof(tp->irq_tbl[0].name);
> +	struct net_device *dev = tp->dev;
> +	struct rtl8169_napi *napi;
> +	struct rtl8169_irq *irq;
> +	int rc = 0;
> +
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		irq = &tp->irq_tbl[i];
> +
> +		napi = &tp->r8169napi[i];
> +		snprintf(irq->name, len, "%s-%d", dev->name, i);

I don't think this is needed. pci_request_irq() supports dynamic
irq name generation.

> +		irq->handler = rtl8169_interrupt;
> +		rc = pci_request_irq(tp->pci_dev, i, irq->handler, NULL, napi, irq->name);
> +		if (rc)
> +			break;
> +
> +		irq->vector = pci_irq_vector(tp->pci_dev, i);
> +		irq->requested = 1;
> +	}
> +
> +	if (rc)
> +		rtl8169_free_irq(tp);
> +	return rc;
> +}
> +
>  static void rtl_task(struct work_struct *work)
>  {
>  	struct rtl8169_private *tp =
> @@ -4914,9 +5033,10 @@ static void rtl_task(struct work_struct *work)
>  
>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>  {
> -	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> +	struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi, napi);
> +	struct rtl8169_private *tp = r8169_napi->priv;
>  	struct net_device *dev = tp->dev;
> -	int work_done;
> +	int work_done = 0;
>  
>  	rtl_tx(dev, tp, budget);
>  
> @@ -5035,7 +5155,7 @@ static void rtl8169_up(struct rtl8169_private *tp)
>  	phy_init_hw(tp->phydev);
>  	phy_resume(tp->phydev);
>  	rtl8169_init_phy(tp);
> -	napi_enable(&tp->napi);
> +	rtl8169_napi_enable(tp);
>  	enable_work(&tp->wk.work);
>  	rtl_reset_work(tp);
>  
> @@ -5053,7 +5173,7 @@ static int rtl8169_close(struct net_device *dev)
>  	rtl8169_down(tp);
>  	rtl8169_rx_clear(tp);
>  
> -	free_irq(tp->irq, tp);
> +	rtl8169_free_irq(tp);
>  
>  	phy_disconnect(tp->phydev);
>  
> @@ -5108,7 +5228,8 @@ static int rtl_open(struct net_device *dev)
>  	rtl_request_firmware(tp);
>  
>  	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
> -	retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
> +
> +	retval = rtl8169_request_irq(tp);
>  	if (retval < 0)
>  		goto err_release_fw_2;
>  
> @@ -5125,7 +5246,7 @@ static int rtl_open(struct net_device *dev)
>  	return retval;
>  
>  err_free_irq:
> -	free_irq(tp->irq, tp);
> +	rtl8169_free_irq(tp);
>  err_release_fw_2:
>  	rtl_release_firmware(tp);
>  	rtl8169_rx_clear(tp);
> @@ -5328,7 +5449,9 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
>  
>  static int rtl_alloc_irq(struct rtl8169_private *tp)
>  {
> +	struct pci_dev *pdev = tp->pci_dev;
>  	unsigned int flags;
> +	int nvecs;
>  
>  	switch (tp->mac_version) {
>  	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
> @@ -5344,7 +5467,18 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>  		break;
>  	}
>  
> -	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
> +	nvecs = pci_alloc_irq_vectors(pdev, tp->min_irq_nvecs, tp->max_irq_nvecs, flags);
> +
> +	if (nvecs < 0)
> +		nvecs = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);

This may be dangerous. If the first allocation fails, you may here allocate an
interrupt of a type not supported by the chip.

> +
> +	if (nvecs < 0)
> +		return nvecs;
> +
> +	tp->irq = pci_irq_vector(pdev, 0);
> +	tp->irq_nvecs = nvecs;
> +
> +	return 0;
>  }
>  
>  static void rtl_read_mac_address(struct rtl8169_private *tp,
> @@ -5539,6 +5673,17 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
>  	}
>  }
>  
> +static int rtl8169_set_real_num_queue(struct rtl8169_private *tp)
> +{
> +	int retval;
> +
> +	retval = netif_set_real_num_tx_queues(tp->dev, 1);
> +	if (retval < 0)
> +		return retval;
> +
> +	return netif_set_real_num_rx_queues(tp->dev, tp->num_rx_rings);
> +}
> +
>  static int rtl_jumbo_max(struct rtl8169_private *tp)
>  {
>  	/* Non-GBit versions don't support jumbo frames */
> @@ -5599,6 +5744,19 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
>  	return false;
>  }
>  
> +static void r8169_init_napi(struct rtl8169_private *tp)
> +{
> +	for (int i = 0; i < tp->irq_nvecs; i++) {
> +		struct rtl8169_napi *r8169napi = &tp->r8169napi[i];
> +		int (*poll)(struct napi_struct *napi, int budget);
> +
> +		poll = rtl8169_poll;
> +		netif_napi_add(tp->dev, &r8169napi->napi, poll);
> +		r8169napi->priv = tp;
> +		r8169napi->index = i;
> +	}
> +}
> +
>  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>  	const struct rtl_chip_info *chip;
> @@ -5703,11 +5861,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	rtl_hw_reset(tp);
>  
> +	rtl_software_parameter_initialize(tp);
> +
>  	rc = rtl_alloc_irq(tp);
>  	if (rc < 0)
>  		return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
>  
> -	tp->irq = pci_irq_vector(pdev, 0);
>  
>  	INIT_WORK(&tp->wk.work, rtl_task);
>  	disable_work(&tp->wk.work);
> @@ -5716,7 +5875,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	dev->ethtool_ops = &rtl8169_ethtool_ops;
>  
> -	netif_napi_add(dev, &tp->napi, rtl8169_poll);
> +	if (!tp->rss_support) {
> +		netif_napi_add(dev, &tp->r8169napi[0].napi, rtl8169_poll);
> +		tp->r8169napi[0].priv = tp;
> +		tp->r8169napi[0].index = 0;
> +	} else {
> +		r8169_init_napi(tp);
> +	}
>  
>  	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>  			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
> @@ -5778,6 +5943,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	if (jumbo_max)
>  		dev->max_mtu = jumbo_max;
>  
> +	rc = rtl8169_set_real_num_queue(tp);
> +	if (rc < 0)
> +		return dev_err_probe(&pdev->dev, rc, "set tx/rx num failure\n");
> +
>  	rtl_set_irq_mask(tp);
>  
>  	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),


  reply	other threads:[~2026-05-06 21:28 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
2026-05-06 21:28   ` Heiner Kallweit [this message]
2026-05-07  4:23     ` Javen
2026-05-06 22:53   ` Jakub Kicinski
2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
2026-05-06 21:45   ` Heiner Kallweit
2026-05-07  6:26     ` Javen
2026-05-06 22:54   ` Jakub Kicinski
2026-05-06  8:13 ` [Patch net-next v1 3/7] r8169: add support for new interrupt mapping javen
2026-05-06  8:13 ` [Patch net-next v1 4/7] r8169: enable " javen
2026-05-06  8:13 ` [Patch net-next v1 5/7] r8169: add support and enable rss javen
2026-05-06  8:13 ` [Patch net-next v1 6/7] r8169: move struct ethtool_ops javen
2026-05-06  8:13 ` [Patch net-next v1 7/7] r8169: add support for ethtool javen
2026-05-06 21:02 ` [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 Heiner Kallweit
2026-05-07  2:19   ` Javen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bbbb2a7d-c6a3-4dcf-94f9-0e580d43897e@gmail.com \
    --to=hkallweit1@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=javen_xu@realsil.com.cn \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=nic_swsd@realtek.com \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox