From: "Arinzon, David" <darinzon@amazon.com>
To: Ahmed Zaki <ahmed.zaki@intel.com>,
"netdev@vger.kernel.org" <netdev@vger.kernel.org>
Cc: "intel-wired-lan@lists.osuosl.org"
<intel-wired-lan@lists.osuosl.org>,
"andrew+netdev@lunn.ch" <andrew+netdev@lunn.ch>,
"edumazet@google.com" <edumazet@google.com>,
"kuba@kernel.org" <kuba@kernel.org>,
"horms@kernel.org" <horms@kernel.org>,
"pabeni@redhat.com" <pabeni@redhat.com>,
"davem@davemloft.net" <davem@davemloft.net>,
"michael.chan@broadcom.com" <michael.chan@broadcom.com>,
"tariqt@nvidia.com" <tariqt@nvidia.com>,
"anthony.l.nguyen@intel.com" <anthony.l.nguyen@intel.com>,
"przemyslaw.kitszel@intel.com" <przemyslaw.kitszel@intel.com>,
"jdamato@fastly.com" <jdamato@fastly.com>,
"shayd@nvidia.com" <shayd@nvidia.com>,
"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
"Agroskin, Shay" <shayagr@amazon.com>,
"kalesh-anakkur.purayil@broadcom.com"
<kalesh-anakkur.purayil@broadcom.com>,
"pavan.chebbi@broadcom.com" <pavan.chebbi@broadcom.com>,
"yury.norov@gmail.com" <yury.norov@gmail.com>
Subject: RE: [PATCH net-next v5 1/6] net: move ARFS rmap management to core
Date: Tue, 14 Jan 2025 09:33:21 +0000 [thread overview]
Message-ID: <4517d3801ecb465785f7c883554c9d7f@amazon.com> (raw)
In-Reply-To: <20250113171042.158123-2-ahmed.zaki@intel.com>
> Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should
> set the flag via netif_enable_cpu_rmap() and core will allocate and manage
> the ARFS rmap. Freeing the rmap is also done by core when the netdev is
> freed.
>
> Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
> ---
> drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++---------------
> drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++----------
> drivers/net/ethernet/intel/ice/ice_arfs.c | 17 +-------
> include/linux/netdevice.h | 15 +++++--
> net/core/dev.c | 44 ++++++++++++++++++++
> 5 files changed, 63 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index c1295dfad0d0..a3fceaa83cd5 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -5,9 +5,6 @@
>
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
> -#ifdef CONFIG_RFS_ACCEL
> -#include <linux/cpu_rmap.h>
> -#endif /* CONFIG_RFS_ACCEL */
> #include <linux/ethtool.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter
> *adapter, static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) {
> #ifdef CONFIG_RFS_ACCEL
> - u32 i;
> - int rc;
> -
> - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter-
> >num_io_queues);
> - if (!adapter->netdev->rx_cpu_rmap)
> - return -ENOMEM;
> - for (i = 0; i < adapter->num_io_queues; i++) {
> - int irq_idx = ENA_IO_IRQ_IDX(i);
> -
> - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
> - pci_irq_vector(adapter->pdev, irq_idx));
> - if (rc) {
> - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> - adapter->netdev->rx_cpu_rmap = NULL;
> - return rc;
> - }
> - }
> -#endif /* CONFIG_RFS_ACCEL */
> + return netif_enable_cpu_rmap(adapter->netdev,
> +adapter->num_io_queues); #else
> return 0;
> +#endif /* CONFIG_RFS_ACCEL */
> }
>
> static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ -
> 1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter
> *adapter)
> struct ena_irq *irq;
> int i;
>
> -#ifdef CONFIG_RFS_ACCEL
> - if (adapter->msix_vecs >= 1) {
> - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> - adapter->netdev->rx_cpu_rmap = NULL;
> - }
> -#endif /* CONFIG_RFS_ACCEL */
> -
> for (i = ENA_IO_IRQ_FIRST_IDX; i <
> ENA_MAX_MSIX_VEC(io_queue_count); i++) {
> irq = &adapter->irq_tbl[i];
> irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@
> static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
> ena_dev = adapter->ena_dev;
> netdev = adapter->netdev;
>
> -#ifdef CONFIG_RFS_ACCEL
> - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
> - free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> - netdev->rx_cpu_rmap = NULL;
> - }
> -
> -#endif /* CONFIG_RFS_ACCEL */
> /* Make sure timer and reset routine won't be called after
> * freeing device resources.
> */
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 884d42db5554..1f50bc715038 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -49,7 +49,6 @@
> #include <linux/cache.h>
> #include <linux/log2.h>
> #include <linux/bitmap.h>
> -#include <linux/cpu_rmap.h>
> #include <linux/cpumask.h>
> #include <net/pkt_cls.h>
> #include <net/page_pool/helpers.h>
> @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt
> *bp)
>
> #ifdef CONFIG_RFS_ACCEL
> if (bp->flags & BNXT_FLAG_RFS)
> - dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings);
> + return netif_enable_cpu_rmap(dev, bp->rx_nr_rings);
> #endif
>
> return rc;
> @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp)
> struct bnxt_irq *irq;
> int i;
>
> -#ifdef CONFIG_RFS_ACCEL
> - free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
> - bp->dev->rx_cpu_rmap = NULL;
> -#endif
> if (!bp->irq_tbl || !bp->bnapi)
> return;
>
> @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp)
>
> static int bnxt_request_irq(struct bnxt *bp) {
> - int i, j, rc = 0;
> + int i, rc = 0;
> unsigned long flags = 0;
> -#ifdef CONFIG_RFS_ACCEL
> - struct cpu_rmap *rmap;
> -#endif
>
> rc = bnxt_setup_int_mode(bp);
> if (rc) {
> @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp)
> rc);
> return rc;
> }
> -#ifdef CONFIG_RFS_ACCEL
> - rmap = bp->dev->rx_cpu_rmap;
> -#endif
> - for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
> +
> + for (i = 0; i < bp->cp_nr_rings; i++) {
> int map_idx = bnxt_cp_num_to_irq_num(bp, i);
> struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
>
> -#ifdef CONFIG_RFS_ACCEL
> - if (rmap && bp->bnapi[i]->rx_ring) {
> - rc = irq_cpu_rmap_add(rmap, irq->vector);
> - if (rc)
> - netdev_warn(bp->dev, "failed adding irq rmap for ring
> %d\n",
> - j);
> - j++;
> - }
> -#endif
> rc = request_irq(irq->vector, irq->handler, flags, irq->name,
> bp->bnapi[i]);
> if (rc)
> diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c
> b/drivers/net/ethernet/intel/ice/ice_arfs.c
> index 7cee365cc7d1..3b1b892e6958 100644
> --- a/drivers/net/ethernet/intel/ice/ice_arfs.c
> +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
> @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
> netdev = vsi->netdev;
> if (!netdev || !netdev->rx_cpu_rmap)
> return;
> -
> - free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> - netdev->rx_cpu_rmap = NULL;
> }
>
> /**
> @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) {
> struct net_device *netdev;
> struct ice_pf *pf;
> - int i;
>
> if (!vsi || vsi->type != ICE_VSI_PF)
> return 0;
> @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
> netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s,
> q_vectors %d\n",
> vsi->type, netdev->name, vsi->num_q_vectors);
>
> - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
> - if (unlikely(!netdev->rx_cpu_rmap))
> - return -EINVAL;
> -
> - ice_for_each_q_vector(vsi, i)
> - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
> - vsi->q_vectors[i]->irq.virq)) {
> - ice_free_cpu_rx_rmap(vsi);
> - return -EINVAL;
> - }
> -
> - return 0;
> + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors);
> }
>
> /**
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index
> aeb4a6cff171..7e95e9ee36dd 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1991,6 +1991,9 @@ enum netdev_reg_state {
> *
> * @threaded: napi threaded mode is enabled
> *
> + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
> + * Set by calling netif_enable_cpu_rmap().
> + *
> * @see_all_hwtstamp_requests: device wants to see calls to
> * ndo_hwtstamp_set() for all timestamp requests
> * regardless of source, even if those aren't
> @@ -2398,6 +2401,9 @@ struct net_device {
> struct lock_class_key *qdisc_tx_busylock;
> bool proto_down;
> bool threaded;
> +#ifdef CONFIG_RFS_ACCEL
> + bool rx_cpu_rmap_auto;
> +#endif
>
> /* priv_flags_slow, ungrouped to save space */
> unsigned long see_all_hwtstamp_requests:1;
> @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index,
> enum netdev_queue_type type,
> struct napi_struct *napi);
>
> -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{
> - napi->irq = irq;
> -}
> +void netif_napi_set_irq(struct napi_struct *napi, int irq);
>
> /* Default NAPI poll() weight
> * Device drivers are strongly advised to not use bigger value @@ -2765,6
> +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi)
> synchronize_net();
> }
>
> +#ifdef CONFIG_RFS_ACCEL
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs);
> +
> +#endif
> struct packet_type {
> __be16 type; /* This is really htons(ether_type). */
> bool ignore_outgoing;
> diff --git a/net/core/dev.c b/net/core/dev.c index
> 1a90ed8cc6cc..3ee7a514dca8 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index, }
> EXPORT_SYMBOL(netif_queue_set_napi);
>
> +#ifdef CONFIG_RFS_ACCEL
> +static void netif_disable_cpu_rmap(struct net_device *dev) {
> + free_irq_cpu_rmap(dev->rx_cpu_rmap);
> + dev->rx_cpu_rmap = NULL;
> + dev->rx_cpu_rmap_auto = false;
> +}
> +
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs) {
> + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
> + if (!dev->rx_cpu_rmap)
> + return -ENOMEM;
> +
> + dev->rx_cpu_rmap_auto = true;
> + return 0;
> +}
> +EXPORT_SYMBOL(netif_enable_cpu_rmap);
> +#endif
> +
> +void netif_napi_set_irq(struct napi_struct *napi, int irq) { #ifdef
> +CONFIG_RFS_ACCEL
> + int rc;
> +#endif
> + napi->irq = irq;
> +
> +#ifdef CONFIG_RFS_ACCEL
> + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) {
> + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq);
> + if (rc) {
> + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
> + rc);
> + netif_disable_cpu_rmap(napi->dev);
> + }
> + }
> +#endif
> +}
> +EXPORT_SYMBOL(netif_napi_set_irq);
> +
> static void napi_restore_config(struct napi_struct *n) {
> n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6
> +11461,10 @@ void free_netdev(struct net_device *dev)
> /* Flush device addresses */
> dev_addr_flush(dev);
>
> +#ifdef CONFIG_RFS_ACCEL
> + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto)
> + netif_disable_cpu_rmap(dev); #endif
> list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
> netif_napi_del(p);
>
> --
> 2.43.0
Thanks for making the change in the ENA driver.
Acked-by: David Arinzon <darinzon@amazon.com>
next prev parent reply other threads:[~2025-01-14 9:33 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-13 17:10 [PATCH net-next v5 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
2025-01-13 17:10 ` [PATCH net-next v5 1/6] net: move ARFS rmap management to core Ahmed Zaki
2025-01-14 9:33 ` Arinzon, David [this message]
2025-01-14 22:08 ` Jakub Kicinski
2025-01-15 1:00 ` Ahmed Zaki
2025-01-15 1:38 ` Jakub Kicinski
2025-01-13 17:10 ` [PATCH net-next v5 2/6] net: napi: add internal ARFS rmap management Ahmed Zaki
2025-01-13 17:10 ` [PATCH net-next v5 3/6] net: napi: add CPU affinity to napi_config Ahmed Zaki
2025-01-13 17:10 ` [PATCH net-next v5 4/6] bnxt: use napi's irq affinity Ahmed Zaki
2025-01-13 17:10 ` [PATCH net-next v5 5/6] ice: " Ahmed Zaki
2025-01-13 17:10 ` [PATCH net-next v5 6/6] idpf: " Ahmed Zaki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4517d3801ecb465785f7c883554c9d7f@amazon.com \
--to=darinzon@amazon.com \
--cc=ahmed.zaki@intel.com \
--cc=akpm@linux-foundation.org \
--cc=andrew+netdev@lunn.ch \
--cc=anthony.l.nguyen@intel.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=intel-wired-lan@lists.osuosl.org \
--cc=jdamato@fastly.com \
--cc=kalesh-anakkur.purayil@broadcom.com \
--cc=kuba@kernel.org \
--cc=michael.chan@broadcom.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pavan.chebbi@broadcom.com \
--cc=przemyslaw.kitszel@intel.com \
--cc=shayagr@amazon.com \
--cc=shayd@nvidia.com \
--cc=tariqt@nvidia.com \
--cc=yury.norov@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).