Netdev List
 help / color / mirror / Atom feed
From: Javen <javen_xu@realsil.com.cn>
To: Heiner Kallweit <hkallweit1@gmail.com>,
	"nic_swsd@realtek.com" <nic_swsd@realtek.com>,
	"andrew+netdev@lunn.ch" <andrew+netdev@lunn.ch>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"edumazet@google.com" <edumazet@google.com>,
	"kuba@kernel.org" <kuba@kernel.org>,
	"pabeni@redhat.com" <pabeni@redhat.com>,
	"horms@kernel.org" <horms@kernel.org>
Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: RE: [Patch net-next v1 1/7] r8169: add support for multi irqs
Date: Thu, 7 May 2026 04:23:48 +0000	[thread overview]
Message-ID: <f0661634d51149538f686dc9cc5291c6@realsil.com.cn> (raw)
In-Reply-To: <bbbb2a7d-c6a3-4dcf-94f9-0e580d43897e@gmail.com>

>On 06.05.2026 10:13, javen wrote:
>> From: Javen Xu <javen_xu@realsil.com.cn>
>>
>> RSS uses multi rx queues to receive packets, and each rx queue needs
>> one irq and napi. So this patch adds support for multi irqs and napi here.
>>
>> Signed-off-by: Javen Xu <javen_xu@realsil.com.cn>
>> ---
>>  drivers/net/ethernet/realtek/r8169_main.c | 199
>> ++++++++++++++++++++--
>>  1 file changed, 184 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c
>> b/drivers/net/ethernet/realtek/r8169_main.c
>> index 791277e750ba..ef74ee02c117 100644
>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>> @@ -77,6 +77,7 @@
>>  #define R8169_RX_RING_BYTES  (NUM_RX_DESC * sizeof(struct RxDesc))
>>  #define R8169_TX_STOP_THRS   (MAX_SKB_FRAGS + 1)
>>  #define R8169_TX_START_THRS  (2 * R8169_TX_STOP_THRS)
>> +#define R8169_MAX_MSIX_VEC   32
>>
>>  #define OCP_STD_PHY_BASE     0xa400
>>
>> @@ -435,6 +436,8 @@ enum rtl8125_registers {
>>  #define INT_CFG0_CLKREQEN            BIT(3)
>>       IntrMask_8125           = 0x38,
>>       IntrStatus_8125         = 0x3c,
>> +     INTR_VEC_MAP_MASK       = 0x800,
>> +     INTR_VEC_MAP_STATUS     = 0x802,
>
>These register names don't have a chip version reference.
>Does this mean they can be used on other chip versions with RSS as well?

Yes, this can be reused on other chip versions.

>
>>       INT_CFG1_8125           = 0x7a,
>>       LEDSEL2                 = 0x84,
>>       LEDSEL1                 = 0x86,
>> @@ -728,6 +731,19 @@ enum rtl_dash_type {
>>       RTL_DASH_25_BP,
>>  };
>>
>> +struct rtl8169_napi {
>> +     struct napi_struct napi;
>> +     void *priv;
>> +     int index;
>
>It seems the index is never used in this patch.

This will be used in patch 5/7, when we enable rss. I will move it back.

>
>> +};
>> +
>> +struct rtl8169_irq {
>> +     irq_handler_t   handler;
>> +     unsigned int    vector;
>> +     u8              requested;
>> +     char            name[IFNAMSIZ + 10];
>> +};
>> +
>>  struct rtl8169_private {
>>       void __iomem *mmio_addr;        /* memory map physical address */
>>       struct pci_dev *pci_dev;
>> @@ -745,9 +761,19 @@ struct rtl8169_private {
>>       dma_addr_t RxPhyAddr;
>>       struct page *Rx_databuff[NUM_RX_DESC];  /* Rx data buffers */
>>       struct ring_info tx_skb[NUM_TX_DESC];   /* Tx data buffers */
>> +     struct rtl8169_irq irq_tbl[R8169_MAX_MSIX_VEC];
>> +     struct rtl8169_napi r8169napi[R8169_MAX_MSIX_VEC];
>> +     u16 isr_reg[R8169_MAX_MSIX_VEC];
>> +     u16 imr_reg[R8169_MAX_MSIX_VEC];
>
>These arrays result in unecessarily high memory consumption on all other
>chip versions. Can't they be dynamically allocated, only in case driver supports
>RSS for the respective chip version?
>

I will replace these arrays with pointers in v2.
During rtl_init_one, driver will dynamically allocate memory(e.g., via devm_kcalloc) based on the rx queue number that hardware supports. In this case, 8 for 8127 and 1 for other chips. So the init path will be the same for all the chips.

>> +     unsigned int num_rx_rings;
>>       u16 cp_cmd;
>>       u16 tx_lpi_timer;
>>       u32 irq_mask;
>> +     u8 min_irq_nvecs;
>> +     u8 max_irq_nvecs;
>
>It seems these values are actually constants.
>Can't we avoid these members?

These values are different for different chips (max_irq_nvecs 32 for 8127 and 1 for others. To avoid these members, I want know whether I should add a helper function to get the max_irq_nvecs/min_irq_nvecs number for different chips according to tp->mac_version or just use local variables in rtl_init_one?

>
>> +     u8 hw_supp_isr_ver;
>> +     u8 hw_curr_isr_ver;
>> +     u8 irq_nvecs;
>>       int irq;
>>       struct clk *clk;
>>
>> @@ -763,6 +789,8 @@ struct rtl8169_private {
>>       unsigned aspm_manageable:1;
>>       unsigned dash_enabled:1;
>>       bool sfp_mode:1;
>> +     bool rss_support:1;
>> +     bool rss_enable:1;
>>       dma_addr_t counters_phys_addr;
>>       struct rtl8169_counters *counters;
>>       struct rtl8169_tc_offsets tc_offset; @@ -2680,6 +2708,44 @@
>> static void rtl_hw_reset(struct rtl8169_private *tp)
>>       rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);  }
>>
>> +static void rtl_setup_mqs_reg(struct rtl8169_private *tp) {
>> +     if (tp->mac_version <= RTL_GIGA_MAC_VER_52) {
>> +             tp->isr_reg[0] = IntrStatus;
>> +             tp->imr_reg[0] = IntrMask;
>> +     } else {
>> +             tp->isr_reg[0] = IntrStatus_8125;
>> +             tp->imr_reg[0] = IntrMask_8125;
>> +     }
>> +
>> +     for (int i = 1; i < tp->max_irq_nvecs; i++)
>> +             tp->isr_reg[i] = (u16)(INTR_VEC_MAP_STATUS + (i - 1) *
>> + 4);
>> +
>> +     for (int i = 1; i < tp->max_irq_nvecs; i++)
>> +             tp->imr_reg[i] = (u16)(INTR_VEC_MAP_MASK + (i - 1) * 4);
>
>This populates the array with constant values. Therefore, can't you avoid
>using this array?

I review the code, I find these array are not used. So I will remove these code.

>
>> +}
>> +
>> +static void rtl_software_parameter_initialize(struct rtl8169_private
>> +*tp) {
>> +     tp->num_rx_rings = 1;
>> +
>> +     switch (tp->mac_version) {
>> +     case RTL_GIGA_MAC_VER_80:
>> +             tp->min_irq_nvecs = 1;
>> +             tp->max_irq_nvecs = 1;
>> +             tp->hw_supp_isr_ver = 6;
>
>Magic value 6 requires at least an explanation and a constant.

hw_supp_isr_ver means different intr status for different chips. For 8127, 6 means interrupt 0-7 is for rx, 8-16 is for tx. For 8125, if hw_supp_isr_ver is 5, this means 0-15 is for rx, 16 and 17 is for tx. The interrupt is distributed according to hw_supp_isr_ver.

>
>> +             break;
>> +     default:
>> +             tp->min_irq_nvecs = 1;
>> +             tp->max_irq_nvecs = 1;
>> +             tp->hw_supp_isr_ver = 1;
>> +             break;
>> +     }
>> +     tp->hw_curr_isr_ver = tp->hw_supp_isr_ver;
>
>This indicates that the current version can be set to a version which is not the
>supported one. This is misleading.
>- Is supp_isr_ver the highest supported isr version?
>- And does this mean that each chip is backwards-compatible and
>  supports also all lower isr versions?
>

All the chips support hw_supp_isr_ver = 1. This means the old interrupt mapping. And the chips are not backwards-compatible and don’t support the lower isr version. hw_supp_isr_ver is the specific version the chip support.

>> +
>> +     rtl_setup_mqs_reg(tp);
>> +}
>> +
>>  static void rtl_request_firmware(struct rtl8169_private *tp)  {
>>       struct rtl_fw *rtl_fw;
>> @@ -4266,9 +4332,21 @@ static void rtl8169_tx_clear(struct rtl8169_private
>*tp)
>>       netdev_reset_queue(tp->dev);
>>  }
>>
>> +static void rtl8169_napi_disable(struct rtl8169_private *tp) {
>> +     for (int i = 0; i < tp->irq_nvecs; i++)
>> +             napi_disable(&tp->r8169napi[i].napi);
>> +}
>> +
>> +static void rtl8169_napi_enable(struct rtl8169_private *tp) {
>> +     for (int i = 0; i < tp->irq_nvecs; i++)
>> +             napi_enable(&tp->r8169napi[i].napi);
>> +}
>> +
>>  static void rtl8169_cleanup(struct rtl8169_private *tp)  {
>> -     napi_disable(&tp->napi);
>> +     rtl8169_napi_disable(tp);
>>
>>       /* Give a racing hard_start_xmit a few cycles to complete. */
>>       synchronize_net();
>> @@ -4313,8 +4391,8 @@ static void rtl_reset_work(struct rtl8169_private
>*tp)
>>
>>       for (i = 0; i < NUM_RX_DESC; i++)
>>               rtl8169_mark_to_asic(tp->RxDescArray + i);
>> +     rtl8169_napi_enable(tp);
>>
>> -     napi_enable(&tp->napi);
>
>This moves the empty line. It should remain where it is.
>
>>       rtl_hw_start(tp);
>>  }
>>
>> @@ -4820,7 +4898,7 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp, int budget
>>                       goto release_descriptor;
>>               }
>>
>> -             skb = napi_alloc_skb(&tp->napi, pkt_size);
>> +             skb = napi_alloc_skb(&tp->r8169napi[0].napi, pkt_size);
>>               if (unlikely(!skb)) {
>>                       dev->stats.rx_dropped++;
>>                       goto release_descriptor;
>> @@ -4844,7 +4922,7 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp, int budget
>>               if (skb->pkt_type == PACKET_MULTICAST)
>>                       dev->stats.multicast++;
>>
>> -             napi_gro_receive(&tp->napi, skb);
>> +             napi_gro_receive(&tp->r8169napi[0].napi, skb);
>>
>>               dev_sw_netstats_rx_add(dev, pkt_size);
>>  release_descriptor:
>> @@ -4856,7 +4934,8 @@ static int rtl_rx(struct net_device *dev, struct
>rtl8169_private *tp, int budget
>>
>>  static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>>  {
>> -     struct rtl8169_private *tp = dev_instance;
>> +     struct rtl8169_napi *napi = dev_instance;
>> +     struct rtl8169_private *tp = napi->priv;
>>       u32 status = rtl_get_events(tp);
>>
>>       if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
>> @@ -4873,13 +4952,53 @@ static irqreturn_t rtl8169_interrupt(int irq, void
>*dev_instance)
>>               phy_mac_interrupt(tp->phydev);
>>
>>       rtl_irq_disable(tp);
>> -     napi_schedule(&tp->napi);
>> +     napi_schedule(&napi->napi);
>>  out:
>>       rtl_ack_events(tp, status);
>>
>>       return IRQ_HANDLED;
>>  }
>>
>> +static void rtl8169_free_irq(struct rtl8169_private *tp)
>> +{
>> +     for (int i = 0; i < tp->irq_nvecs; i++) {
>> +             struct rtl8169_irq *irq = &tp->irq_tbl[i];
>> +             struct rtl8169_napi *napi = &tp->r8169napi[i];
>> +
>> +             if (irq->requested) {
>
>Is this check actually needed? Wouldn't pci_free_irq()
>also be fine with irqs not having been requested?

It is not needed, I will remove it.

>
>> +                     irq->requested = 0;
>> +                     pci_free_irq(tp->pci_dev, i, napi);
>> +             }
>> +     }
>> +}
>> +
>> +static int rtl8169_request_irq(struct rtl8169_private *tp)
>> +{
>> +     const int len = sizeof(tp->irq_tbl[0].name);
>> +     struct net_device *dev = tp->dev;
>> +     struct rtl8169_napi *napi;
>> +     struct rtl8169_irq *irq;
>> +     int rc = 0;
>> +
>> +     for (int i = 0; i < tp->irq_nvecs; i++) {
>> +             irq = &tp->irq_tbl[i];
>> +
>> +             napi = &tp->r8169napi[i];
>> +             snprintf(irq->name, len, "%s-%d", dev->name, i);
>
>I don't think this is needed. pci_request_irq() supports dynamic
>irq name generation.
>

I will remove it.

>> +             irq->handler = rtl8169_interrupt;
>> +             rc = pci_request_irq(tp->pci_dev, i, irq->handler, NULL, napi, irq-
>>name);
>> +             if (rc)
>> +                     break;
>> +
>> +             irq->vector = pci_irq_vector(tp->pci_dev, i);
>> +             irq->requested = 1;
>> +     }
>> +
>> +     if (rc)
>> +             rtl8169_free_irq(tp);
>> +     return rc;
>> +}
>> +
>>  static void rtl_task(struct work_struct *work)
>>  {
>>       struct rtl8169_private *tp =
>> @@ -4914,9 +5033,10 @@ static void rtl_task(struct work_struct *work)
>>
>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>>  {
>> -     struct rtl8169_private *tp = container_of(napi, struct rtl8169_private,
>napi);
>> +     struct rtl8169_napi *r8169_napi = container_of(napi, struct rtl8169_napi,
>napi);
>> +     struct rtl8169_private *tp = r8169_napi->priv;
>>       struct net_device *dev = tp->dev;
>> -     int work_done;
>> +     int work_done = 0;
>>
>>       rtl_tx(dev, tp, budget);
>>
>> @@ -5035,7 +5155,7 @@ static void rtl8169_up(struct rtl8169_private *tp)
>>       phy_init_hw(tp->phydev);
>>       phy_resume(tp->phydev);
>>       rtl8169_init_phy(tp);
>> -     napi_enable(&tp->napi);
>> +     rtl8169_napi_enable(tp);
>>       enable_work(&tp->wk.work);
>>       rtl_reset_work(tp);
>>
>> @@ -5053,7 +5173,7 @@ static int rtl8169_close(struct net_device *dev)
>>       rtl8169_down(tp);
>>       rtl8169_rx_clear(tp);
>>
>> -     free_irq(tp->irq, tp);
>> +     rtl8169_free_irq(tp);
>>
>>       phy_disconnect(tp->phydev);
>>
>> @@ -5108,7 +5228,8 @@ static int rtl_open(struct net_device *dev)
>>       rtl_request_firmware(tp);
>>
>>       irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD :
>IRQF_SHARED;
>> -     retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
>> +
>> +     retval = rtl8169_request_irq(tp);
>>       if (retval < 0)
>>               goto err_release_fw_2;
>>
>> @@ -5125,7 +5246,7 @@ static int rtl_open(struct net_device *dev)
>>       return retval;
>>
>>  err_free_irq:
>> -     free_irq(tp->irq, tp);
>> +     rtl8169_free_irq(tp);
>>  err_release_fw_2:
>>       rtl_release_firmware(tp);
>>       rtl8169_rx_clear(tp);
>> @@ -5328,7 +5449,9 @@ static void rtl_set_irq_mask(struct rtl8169_private
>*tp)
>>
>>  static int rtl_alloc_irq(struct rtl8169_private *tp)
>>  {
>> +     struct pci_dev *pdev = tp->pci_dev;
>>       unsigned int flags;
>> +     int nvecs;
>>
>>       switch (tp->mac_version) {
>>       case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
>> @@ -5344,7 +5467,18 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>>               break;
>>       }
>>
>> -     return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
>> +     nvecs = pci_alloc_irq_vectors(pdev, tp->min_irq_nvecs, tp-
>>max_irq_nvecs, flags);
>> +
>> +     if (nvecs < 0)
>> +             nvecs = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
>
>This may be dangerous. If the first allocation fails, you may here allocate an
>interrupt of a type not supported by the chip.
>

I will replace it with pci_alloc_irq_vectors(pdev, 1, 1, flags).

>> +
>> +     if (nvecs < 0)
>> +             return nvecs;
>> +
>> +     tp->irq = pci_irq_vector(pdev, 0);
>> +     tp->irq_nvecs = nvecs;
>> +
>> +     return 0;
>>  }
>>
>>  static void rtl_read_mac_address(struct rtl8169_private *tp,
>> @@ -5539,6 +5673,17 @@ static void rtl_hw_initialize(struct rtl8169_private
>*tp)
>>       }
>>  }
>>
>> +static int rtl8169_set_real_num_queue(struct rtl8169_private *tp)
>> +{
>> +     int retval;
>> +
>> +     retval = netif_set_real_num_tx_queues(tp->dev, 1);
>> +     if (retval < 0)
>> +             return retval;
>> +
>> +     return netif_set_real_num_rx_queues(tp->dev, tp->num_rx_rings);
>> +}
>> +
>>  static int rtl_jumbo_max(struct rtl8169_private *tp)
>>  {
>>       /* Non-GBit versions don't support jumbo frames */
>> @@ -5599,6 +5744,19 @@ static bool rtl_aspm_is_safe(struct
>rtl8169_private *tp)
>>       return false;
>>  }
>>
>> +static void r8169_init_napi(struct rtl8169_private *tp)
>> +{
>> +     for (int i = 0; i < tp->irq_nvecs; i++) {
>> +             struct rtl8169_napi *r8169napi = &tp->r8169napi[i];
>> +             int (*poll)(struct napi_struct *napi, int budget);
>> +
>> +             poll = rtl8169_poll;
>> +             netif_napi_add(tp->dev, &r8169napi->napi, poll);
>> +             r8169napi->priv = tp;
>> +             r8169napi->index = i;
>> +     }
>> +}
>> +
>>  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>>  {
>>       const struct rtl_chip_info *chip;
>> @@ -5703,11 +5861,12 @@ static int rtl_init_one(struct pci_dev *pdev,
>const struct pci_device_id *ent)
>>
>>       rtl_hw_reset(tp);
>>
>> +     rtl_software_parameter_initialize(tp);
>> +
>>       rc = rtl_alloc_irq(tp);
>>       if (rc < 0)
>>               return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n");
>>
>> -     tp->irq = pci_irq_vector(pdev, 0);
>>
>>       INIT_WORK(&tp->wk.work, rtl_task);
>>       disable_work(&tp->wk.work);
>> @@ -5716,7 +5875,13 @@ static int rtl_init_one(struct pci_dev *pdev, const
>struct pci_device_id *ent)
>>
>>       dev->ethtool_ops = &rtl8169_ethtool_ops;
>>
>> -     netif_napi_add(dev, &tp->napi, rtl8169_poll);
>> +     if (!tp->rss_support) {
>> +             netif_napi_add(dev, &tp->r8169napi[0].napi, rtl8169_poll);
>> +             tp->r8169napi[0].priv = tp;
>> +             tp->r8169napi[0].index = 0;
>> +     } else {
>> +             r8169_init_napi(tp);
>> +     }
>>
>>       dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>>                          NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
>> @@ -5778,6 +5943,10 @@ static int rtl_init_one(struct pci_dev *pdev, const
>struct pci_device_id *ent)
>>       if (jumbo_max)
>>               dev->max_mtu = jumbo_max;
>>
>> +     rc = rtl8169_set_real_num_queue(tp);
>> +     if (rc < 0)
>> +             return dev_err_probe(&pdev->dev, rc, "set tx/rx num failure\n");
>> +
>>       rtl_set_irq_mask(tp);
>>
>>       tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp-
>>counters),


  reply	other threads:[~2026-05-07  4:24 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-06  8:13 [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 javen
2026-05-06  8:13 ` [Patch net-next v1 1/7] r8169: add support for multi irqs javen
2026-05-06 21:28   ` Heiner Kallweit
2026-05-07  4:23     ` Javen [this message]
2026-05-06 22:53   ` Jakub Kicinski
2026-05-06  8:13 ` [Patch net-next v1 2/7] r8169: add support for multi rx queues javen
2026-05-06 21:45   ` Heiner Kallweit
2026-05-07  6:26     ` Javen
2026-05-06 22:54   ` Jakub Kicinski
2026-05-06  8:13 ` [Patch net-next v1 3/7] r8169: add support for new interrupt mapping javen
2026-05-06  8:13 ` [Patch net-next v1 4/7] r8169: enable " javen
2026-05-06  8:13 ` [Patch net-next v1 5/7] r8169: add support and enable rss javen
2026-05-06  8:13 ` [Patch net-next v1 6/7] r8169: move struct ethtool_ops javen
2026-05-06  8:13 ` [Patch net-next v1 7/7] r8169: add support for ethtool javen
2026-05-06 21:02 ` [Patch net-next v1 0/7] r8169: add RSS support for RTL8127 Heiner Kallweit
2026-05-07  2:19   ` Javen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f0661634d51149538f686dc9cc5291c6@realsil.com.cn \
    --to=javen_xu@realsil.com.cn \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hkallweit1@gmail.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=nic_swsd@realtek.com \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox