[PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config
@ 2024-12-10  0:26 Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 1/6] net: napi: add irq_flags to napi struct Ahmed Zaki
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

Move the IRQ affinity management to the napi struct. All drivers that are
already using netif_napi_set_irq() are modified to the new API. Except
mlx5 because it is implementing IRQ pools and moving to the new API does
not seem trivial.

Tested on bnxt, ice and idpf.
---
Opens: is cpu_online_mask the best default mask? drivers do this differently 

RFC -> v1:
    - move static inline affinity functions to net/dev/core.c
    - add the new napi->irq_flags (patch 1)
    - add code changes to bnxt, mlx4 and ice.

Ahmed Zaki (6):
  net: napi: add irq_flags to napi struct
  net: napi: add CPU affinity to napi->config
  bnxt: use napi's irq affinity
  mlx4: use napi's irq affinity
  ice: use napi's irq affinity
  idpf: use napi's irq affinity

 drivers/net/ethernet/amazon/ena/ena_netdev.c  |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 26 ++---------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  2 -
 drivers/net/ethernet/broadcom/tg3.c           |  2 +-
 drivers/net/ethernet/google/gve/gve_utils.c   |  2 +-
 drivers/net/ethernet/intel/e1000/e1000_main.c |  2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c    |  2 +-
 drivers/net/ethernet/intel/ice/ice.h          |  3 --
 drivers/net/ethernet/intel/ice/ice_base.c     |  7 +--
 drivers/net/ethernet/intel/ice/ice_lib.c      |  9 +---
 drivers/net/ethernet/intel/ice/ice_main.c     | 44 -------------------
 drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 19 +++-----
 drivers/net/ethernet/intel/idpf/idpf_txrx.h   |  6 +--
 drivers/net/ethernet/mellanox/mlx4/en_cq.c    |  8 ++--
 .../net/ethernet/mellanox/mlx4/en_netdev.c    | 33 +-------------
 drivers/net/ethernet/mellanox/mlx4/eq.c       | 22 ----------
 drivers/net/ethernet/mellanox/mlx4/main.c     | 42 ++----------------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h     |  1 -
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |  1 -
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  2 +-
 drivers/net/ethernet/meta/fbnic/fbnic_txrx.c  |  3 +-
 include/linux/netdevice.h                     | 11 ++++-
 net/core/dev.c                                | 33 +++++++++++++-
 23 files changed, 73 insertions(+), 209 deletions(-)

-- 
2.47.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v1 net-next 1/6] net: napi: add irq_flags to napi struct
  2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
@ 2024-12-10  0:26 ` Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

Add irq_flags to the napi struct. This will allow the drivers to choose
how the core handles the IRQ assigned to the napi via
netif_napi_set_irq().

Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c      | 2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 2 +-
 drivers/net/ethernet/broadcom/tg3.c               | 2 +-
 drivers/net/ethernet/google/gve/gve_utils.c       | 2 +-
 drivers/net/ethernet/intel/e1000/e1000_main.c     | 2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c        | 2 +-
 drivers/net/ethernet/intel/ice/ice_lib.c          | 2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c        | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 drivers/net/ethernet/meta/fbnic/fbnic_txrx.c      | 3 ++-
 include/linux/netdevice.h                         | 5 ++++-
 net/core/dev.c                                    | 2 +-
 12 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 63c8a2328142..5b5b22621dcc 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1712,7 +1712,7 @@ static int ena_request_io_irq(struct ena_adapter *adapter)
 	for (i = 0; i < io_queue_count; i++) {
 		irq_idx = ENA_IO_IRQ_IDX(i);
 		irq = &adapter->irq_tbl[irq_idx];
-		netif_napi_set_irq(&adapter->ena_napi[i].napi, irq->vector);
+		netif_napi_set_irq(&adapter->ena_napi[i].napi, irq->vector, 0);
 	}
 
 	return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5f7bdafcf05d..45b27460d462 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11174,7 +11174,7 @@ static int bnxt_request_irq(struct bnxt *bp)
 		if (rc)
 			break;
 
-		netif_napi_set_irq(&bp->bnapi[i]->napi, irq->vector);
+		netif_napi_set_irq(&bp->bnapi[i]->napi, irq->vector, 0);
 		irq->requested = 1;
 
 		if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 01dfec115942..3712af3f95f4 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7447,7 +7447,7 @@ static void tg3_napi_init(struct tg3 *tp)
 	for (i = 0; i < tp->irq_cnt; i++) {
 		netif_napi_add(tp->dev, &tp->napi[i].napi,
 			       i ? tg3_poll_msix : tg3_poll);
-		netif_napi_set_irq(&tp->napi[i].napi, tp->napi[i].irq_vec);
+		netif_napi_set_irq(&tp->napi[i].napi, tp->napi[i].irq_vec, 0);
 	}
 }
 
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 30fef100257e..2657e583f5c6 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -111,7 +111,7 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 
 	netif_napi_add(priv->dev, &block->napi, gve_poll);
-	netif_napi_set_irq(&block->napi, block->irq);
+	netif_napi_set_irq(&block->napi, block->irq, 0);
 }
 
 void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 3f089c3d47b2..a83af159837a 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -1394,7 +1394,7 @@ int e1000_open(struct net_device *netdev)
 	/* From here on the code is the same as e1000_up() */
 	clear_bit(__E1000_DOWN, &adapter->flags);
 
-	netif_napi_set_irq(&adapter->napi, adapter->pdev->irq);
+	netif_napi_set_irq(&adapter->napi, adapter->pdev->irq, 0);
 	napi_enable(&adapter->napi);
 	netif_queue_set_napi(netdev, 0, NETDEV_QUEUE_TYPE_RX, &adapter->napi);
 	netif_queue_set_napi(netdev, 0, NETDEV_QUEUE_TYPE_TX, &adapter->napi);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 286155efcedf..8fc5603ed962 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4676,7 +4676,7 @@ int e1000e_open(struct net_device *netdev)
 	else
 		irq = adapter->pdev->irq;
 
-	netif_napi_set_irq(&adapter->napi, irq);
+	netif_napi_set_irq(&adapter->napi, irq, 0);
 	napi_enable(&adapter->napi);
 	netif_queue_set_napi(netdev, 0, NETDEV_QUEUE_TYPE_RX, &adapter->napi);
 	netif_queue_set_napi(netdev, 0, NETDEV_QUEUE_TYPE_TX, &adapter->napi);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index a7d45a8ce7ac..ff91e70f596f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2735,7 +2735,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
 	ice_for_each_q_vector(vsi, v_idx) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
 
-		netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
+		netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq, 0);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 0e92956e84cf..b8531283e3ac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -150,7 +150,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	case TX:
 		cq->mcq.comp = mlx4_en_tx_irq;
 		netif_napi_add_tx(cq->dev, &cq->napi, mlx4_en_poll_tx_cq);
-		netif_napi_set_irq(&cq->napi, irq);
+		netif_napi_set_irq(&cq->napi, irq, 0);
 		napi_enable(&cq->napi);
 		netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_TX, &cq->napi);
 		break;
@@ -158,7 +158,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 		cq->mcq.comp = mlx4_en_rx_irq;
 		netif_napi_add_config(cq->dev, &cq->napi, mlx4_en_poll_rx_cq,
 				      cq_idx);
-		netif_napi_set_irq(&cq->napi, irq);
+		netif_napi_set_irq(&cq->napi, irq, 0);
 		napi_enable(&cq->napi);
 		netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_RX, &cq->napi);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d0b80b520397..93e53d9bd8d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2726,7 +2726,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->lag_port = mlx5e_enumerate_lag_port(mdev, ix);
 
 	netif_napi_add_config(netdev, &c->napi, mlx5e_napi_poll, ix);
-	netif_napi_set_irq(&c->napi, irq);
+	netif_napi_set_irq(&c->napi, irq, 0);
 
 	err = mlx5e_open_queues(c, params, cparam);
 	if (unlikely(err))
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index b5050fabe8fe..6ca91ce85d48 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -1227,7 +1227,8 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
 
 	/* Record IRQ to NAPI struct */
 	netif_napi_set_irq(&nv->napi,
-			   pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
+			   pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx),
+			   0);
 
 	/* Tie nv back to PCIe dev */
 	nv->dev = fbd->dev;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ecc686409161..b598de335d26 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -391,6 +391,7 @@ struct napi_struct {
 	struct list_head	dev_list;
 	struct hlist_node	napi_hash_node;
 	int			irq;
+	unsigned long		irq_flags;
 	int			index;
 	struct napi_config	*config;
 };
@@ -2667,9 +2668,11 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
 			  enum netdev_queue_type type,
 			  struct napi_struct *napi);
 
-static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
+static inline void netif_napi_set_irq(struct napi_struct *napi,
+				      int irq, unsigned long flags)
 {
 	napi->irq = irq;
+	napi->irq_flags = flags;
 }
 
 /* Default NAPI poll() weight
diff --git a/net/core/dev.c b/net/core/dev.c
index 13d00fc10f55..6ef9eb401fb2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6764,7 +6764,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
 	 */
 	if (dev->threaded && napi_kthread_create(napi))
 		dev->threaded = false;
-	netif_napi_set_irq(napi, -1);
+	netif_napi_set_irq(napi, -1, 0);
 }
 EXPORT_SYMBOL(netif_napi_add_weight);
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config
  2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 1/6] net: napi: add irq_flags to napi struct Ahmed Zaki
@ 2024-12-10  0:26 ` Ahmed Zaki
  2024-12-10  1:29   ` Joe Damato
  2024-12-10  0:26 ` [PATCH v1 net-next 3/6] bnxt: use napi's irq affinity Ahmed Zaki
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

A common task for most drivers is to remember the user's CPU affinity to
its IRQs. On each netdev reset, the driver must then re-assign the
user's setting to the IRQs.

Add CPU affinity mask to napi->config. To delegate the CPU affinity
management to the core, drivers must:
 1 - add a persistent napi config:     netif_napi_add_config()
 2 - bind an IRQ to the napi instance: netif_napi_set_irq()

the core will then make sure to use re-assign affinity to the napi's
IRQ.

The default mask set to all IRQs is all online CPUs.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 include/linux/netdevice.h |  6 ++++++
 net/core/dev.c            | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b598de335d26..9bf91c3aca8d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -350,9 +350,14 @@ struct napi_config {
 	u64 gro_flush_timeout;
 	u64 irq_suspend_timeout;
 	u32 defer_hard_irqs;
+	cpumask_t affinity_mask;
 	unsigned int napi_id;
 };
 
+enum {
+	NAPIF_F_IRQ_AFFINITY		= BIT(0)
+};
+
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
@@ -394,6 +399,7 @@ struct napi_struct {
 	unsigned long		irq_flags;
 	int			index;
 	struct napi_config	*config;
+	struct irq_affinity_notify affinity_notify;
 };
 
 enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ef9eb401fb2..778ba27d2b83 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6699,11 +6699,35 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
 }
 EXPORT_SYMBOL(netif_queue_set_napi);
 
+static void
+netif_napi_affinity_notify(struct irq_affinity_notify *notify,
+			   const cpumask_t *mask)
+{
+	struct napi_struct *napi =
+		container_of(notify, struct napi_struct, affinity_notify);
+
+	if (napi->config)
+		cpumask_copy(&napi->config->affinity_mask, mask);
+}
+
+static void
+netif_napi_affinity_release(struct kref __always_unused *ref)
+{
+}
+
 static void napi_restore_config(struct napi_struct *n)
 {
 	n->defer_hard_irqs = n->config->defer_hard_irqs;
 	n->gro_flush_timeout = n->config->gro_flush_timeout;
 	n->irq_suspend_timeout = n->config->irq_suspend_timeout;
+
+	if (n->irq > 0 && n->irq_flags & NAPIF_F_IRQ_AFFINITY) {
+		n->affinity_notify.notify = netif_napi_affinity_notify;
+		n->affinity_notify.release = netif_napi_affinity_release;
+		irq_set_affinity_notifier(n->irq, &n->affinity_notify);
+		irq_set_affinity(n->irq, &n->config->affinity_mask);
+	}
+
 	/* a NAPI ID might be stored in the config, if so use it. if not, use
 	 * napi_hash_add to generate one for us. It will be saved to the config
 	 * in napi_disable.
@@ -6720,6 +6744,8 @@ static void napi_save_config(struct napi_struct *n)
 	n->config->gro_flush_timeout = n->gro_flush_timeout;
 	n->config->irq_suspend_timeout = n->irq_suspend_timeout;
 	n->config->napi_id = n->napi_id;
+	if (n->irq > 0 && n->irq_flags & NAPIF_F_IRQ_AFFINITY)
+		irq_set_affinity_notifier(n->irq, NULL);
 	napi_hash_del(n);
 }
 
@@ -11184,7 +11210,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 {
 	struct net_device *dev;
 	size_t napi_config_sz;
-	unsigned int maxqs;
+	unsigned int maxqs, i;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -11280,6 +11306,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT);
 	if (!dev->napi_config)
 		goto free_all;
+	for (i = 0; i < maxqs; i++)
+		cpumask_copy(&dev->napi_config[i].affinity_mask,
+			     cpu_online_mask);
 
 	strscpy(dev->name, name);
 	dev->name_assign_type = name_assign_type;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config
  2024-12-10  0:26 ` [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
@ 2024-12-10  1:29   ` Joe Damato
  2024-12-11  4:21     ` Jakub Kicinski
  2024-12-11 16:33     ` Ahmed Zaki
  0 siblings, 2 replies; 10+ messages in thread
From: Joe Damato @ 2024-12-10  1:29 UTC (permalink / raw)
  To: Ahmed Zaki
  Cc: netdev, intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni,
	davem, michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel

On Mon, Dec 09, 2024 at 05:26:22PM -0700, Ahmed Zaki wrote:
> A common task for most drivers is to remember the user's CPU affinity to
> its IRQs. On each netdev reset, the driver must then re-assign the
> user's setting to the IRQs.
> 
> Add CPU affinity mask to napi->config. To delegate the CPU affinity
> management to the core, drivers must:
>  1 - add a persistent napi config:     netif_napi_add_config()
>  2 - bind an IRQ to the napi instance: netif_napi_set_irq()
> 
> the core will then make sure to use re-assign affinity to the napi's
> IRQ.
> 
> The default mask set to all IRQs is all online CPUs.
> 
> Suggested-by: Jakub Kicinski <kuba@kernel.org>
> Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
> ---

[...]

> diff --git a/net/core/dev.c b/net/core/dev.c
> index 6ef9eb401fb2..778ba27d2b83 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6699,11 +6699,35 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
>  }
>  EXPORT_SYMBOL(netif_queue_set_napi);
>  
> +static void
> +netif_napi_affinity_notify(struct irq_affinity_notify *notify,
> +			   const cpumask_t *mask)
> +{
> +	struct napi_struct *napi =
> +		container_of(notify, struct napi_struct, affinity_notify);
> +
> +	if (napi->config)
> +		cpumask_copy(&napi->config->affinity_mask, mask);
> +}
> +
> +static void
> +netif_napi_affinity_release(struct kref __always_unused *ref)
> +{
> +}
> +
>  static void napi_restore_config(struct napi_struct *n)
>  {
>  	n->defer_hard_irqs = n->config->defer_hard_irqs;
>  	n->gro_flush_timeout = n->config->gro_flush_timeout;
>  	n->irq_suspend_timeout = n->config->irq_suspend_timeout;
> +
> +	if (n->irq > 0 && n->irq_flags & NAPIF_F_IRQ_AFFINITY) {
> +		n->affinity_notify.notify = netif_napi_affinity_notify;
> +		n->affinity_notify.release = netif_napi_affinity_release;
> +		irq_set_affinity_notifier(n->irq, &n->affinity_notify);
> +		irq_set_affinity(n->irq, &n->config->affinity_mask);
> +	}
> +
>  	/* a NAPI ID might be stored in the config, if so use it. if not, use
>  	 * napi_hash_add to generate one for us. It will be saved to the config
>  	 * in napi_disable.
> @@ -6720,6 +6744,8 @@ static void napi_save_config(struct napi_struct *n)
>  	n->config->gro_flush_timeout = n->gro_flush_timeout;
>  	n->config->irq_suspend_timeout = n->irq_suspend_timeout;
>  	n->config->napi_id = n->napi_id;
> +	if (n->irq > 0 && n->irq_flags & NAPIF_F_IRQ_AFFINITY)
> +		irq_set_affinity_notifier(n->irq, NULL);

My understanding when I attempted this was that using generic IRQ
notifiers breaks ARFS [1], because IRQ notifiers only support a
single notifier and so drivers with ARFS can't _also_ set their own
notifiers for that.

Two ideas were proposed in the thread I mentioned:
  1. Have multiple notifiers per IRQ so that having a generic core
     based notifier wouldn't break ARFS.
  2. Jakub mentioned calling cpu_rmap_update from the core so that a
     generic solution wouldn't be blocked.

I don't know anything about option 1, so I looked at option 2.

At the time when I read the code, it seemed that cpu_rmap_update
required some state be passed in (struct irq_glue), so in that case,
the only way to call cpu_rmap_update from the core would be to
maintain some state about ARFS in the core, too, so that drivers
which support ARFS won't be broken by this change.

At that time there was no persistent per-NAPI config, but since
there is now, there might be a way to solve this.

Just guessing here, but maybe one way to solve this would be to move
ARFS into the core by:
  - Adding a new bit in addition to NAPIF_F_IRQ_AFFINITY... I don't
    know NAPIF_F_ARFS_AFFINITY or something? so that drivers
    could express that they support ARFS.
  - Remove the driver calls to irq_cpu_rmap_add and make sure to
    pass the new bit in for drivers that support ARFS (in your
    changeset, I believe that would be at least ice, mlx4, and
    bnxt... possibly more?).
  - In the generic core code, if the ARFS bit is set then you pass
    in the state needed for ARFS to work, otherwise do what the
    proposed code is doing now.

But, that's just a guess. Maybe there's a better way.

In any case, the proposed change as-is, I think, breaks ARFS in the
same way my proposed change did, so I think any further iterations
on this might need to also include something to avoid breaking ARFS.

FWIW, I was trying to solve a slightly different problem which was
removing a repeated check in driver napi poll functions to determine
if the IRQ affinity had changed.

But with the persistent NAPI state, it might be possible to solve
all of these problems in one go since they are all related?

[1]: https://lore.kernel.org/lkml/701eb84c-8d26-4945-8af3-55a70e05b09c@nvidia.com/

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config
  2024-12-10  1:29   ` Joe Damato
@ 2024-12-11  4:21     ` Jakub Kicinski
  2024-12-11 16:33     ` Ahmed Zaki
  1 sibling, 0 replies; 10+ messages in thread
From: Jakub Kicinski @ 2024-12-11  4:21 UTC (permalink / raw)
  To: Joe Damato
  Cc: Ahmed Zaki, netdev, intel-wired-lan, andrew+netdev, edumazet,
	pabeni, davem, michael.chan, tariqt, anthony.l.nguyen,
	przemyslaw.kitszel

On Mon, 9 Dec 2024 17:29:00 -0800 Joe Damato wrote:
> My understanding when I attempted this was that using generic IRQ
> notifiers breaks ARFS [1], because IRQ notifiers only support a
> single notifier and so drivers with ARFS can't _also_ set their own
> notifiers for that.

Ah, you are so right, I forgot the details and was grepping for
notifier registration :S

> Two ideas were proposed in the thread I mentioned:
>   1. Have multiple notifiers per IRQ so that having a generic core
>      based notifier wouldn't break ARFS.
>   2. Jakub mentioned calling cpu_rmap_update from the core so that a
>      generic solution wouldn't be blocked.
> 
> I don't know anything about option 1, so I looked at option 2.
> 
> At the time when I read the code, it seemed that cpu_rmap_update
> required some state be passed in (struct irq_glue), so in that case,
> the only way to call cpu_rmap_update from the core would be to
> maintain some state about ARFS in the core, too, so that drivers
> which support ARFS won't be broken by this change.
> 
> At that time there was no persistent per-NAPI config, but since
> there is now, there might be a way to solve this.
> 
> Just guessing here, but maybe one way to solve this would be to move
> ARFS into the core by:
>   - Adding a new bit in addition to NAPIF_F_IRQ_AFFINITY... I don't
>     know NAPIF_F_ARFS_AFFINITY or something? so that drivers
>     could express that they support ARFS.
>   - Remove the driver calls to irq_cpu_rmap_add and make sure to
>     pass the new bit in for drivers that support ARFS (in your
>     changeset, I believe that would be at least ice, mlx4, and
>     bnxt... possibly more?).
>   - In the generic core code, if the ARFS bit is set then you pass
>     in the state needed for ARFS to work, otherwise do what the
>     proposed code is doing now.

SG, maybe I'd put the flag(s) in struct net_device, so that we are sure 
the whole device either wants the new behavior or not.

I say flag(s) because idpf probably wants just to opt in for affinity
mask management, without ARFS. So pretty close to Ahmed's existing code.
ARFS support will require another flag to ask for rmap management in
the core as you describe.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config
  2024-12-10  1:29   ` Joe Damato
  2024-12-11  4:21     ` Jakub Kicinski
@ 2024-12-11 16:33     ` Ahmed Zaki
  1 sibling, 0 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-11 16:33 UTC (permalink / raw)
  To: Joe Damato, netdev, intel-wired-lan, andrew+netdev, edumazet,
	kuba, pabeni, davem, michael.chan, tariqt, anthony.l.nguyen,
	przemyslaw.kitszel



On 2024-12-09 6:29 p.m., Joe Damato wrote:
> On Mon, Dec 09, 2024 at 05:26:22PM -0700, Ahmed Zaki wrote:
>> A common task for most drivers is to remember the user's CPU affinity to
>> its IRQs. On each netdev reset, the driver must then re-assign the
>> user's setting to the IRQs.
>>
>> Add CPU affinity mask to napi->config. To delegate the CPU affinity
>> management to the core, drivers must:
>>   1 - add a persistent napi config:     netif_napi_add_config()
>>   2 - bind an IRQ to the napi instance: netif_napi_set_irq()
>>
>> the core will then make sure to use re-assign affinity to the napi's
>> IRQ.
>>
>> The default mask set to all IRQs is all online CPUs.
>>
>> Suggested-by: Jakub Kicinski <kuba@kernel.org>
>> Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
>> ---
> 
> [...]
> 
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 6ef9eb401fb2..778ba27d2b83 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -6699,11 +6699,35 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
>>   }
>>   EXPORT_SYMBOL(netif_queue_set_napi);
>>   
>> +static void
>> +netif_napi_affinity_notify(struct irq_affinity_notify *notify,
>> +			   const cpumask_t *mask)
>> +{
>> +	struct napi_struct *napi =
>> +		container_of(notify, struct napi_struct, affinity_notify);
>> +
>> +	if (napi->config)
>> +		cpumask_copy(&napi->config->affinity_mask, mask);
>> +}
>> +
>> +static void
>> +netif_napi_affinity_release(struct kref __always_unused *ref)
>> +{
>> +}
>> +
>>   static void napi_restore_config(struct napi_struct *n)
>>   {
>>   	n->defer_hard_irqs = n->config->defer_hard_irqs;
>>   	n->gro_flush_timeout = n->config->gro_flush_timeout;
>>   	n->irq_suspend_timeout = n->config->irq_suspend_timeout;
>> +
>> +	if (n->irq > 0 && n->irq_flags & NAPIF_F_IRQ_AFFINITY) {
>> +		n->affinity_notify.notify = netif_napi_affinity_notify;
>> +		n->affinity_notify.release = netif_napi_affinity_release;
>> +		irq_set_affinity_notifier(n->irq, &n->affinity_notify);
>> +		irq_set_affinity(n->irq, &n->config->affinity_mask);
>> +	}
>> +
>>   	/* a NAPI ID might be stored in the config, if so use it. if not, use
>>   	 * napi_hash_add to generate one for us. It will be saved to the config
>>   	 * in napi_disable.
>> @@ -6720,6 +6744,8 @@ static void napi_save_config(struct napi_struct *n)
>>   	n->config->gro_flush_timeout = n->gro_flush_timeout;
>>   	n->config->irq_suspend_timeout = n->irq_suspend_timeout;
>>   	n->config->napi_id = n->napi_id;
>> +	if (n->irq > 0 && n->irq_flags & NAPIF_F_IRQ_AFFINITY)
>> +		irq_set_affinity_notifier(n->irq, NULL);
> 
> My understanding when I attempted this was that using generic IRQ
> notifiers breaks ARFS [1], because IRQ notifiers only support a
> single notifier and so drivers with ARFS can't _also_ set their own
> notifiers for that.

Thank you for the review and reply. I was wondering why some drivers 
check for ARFS (in buggy ways) before setting affinity notifiers. I now 
have a better understanding.

> 
> Two ideas were proposed in the thread I mentioned:
>    1. Have multiple notifiers per IRQ so that having a generic core
>       based notifier wouldn't break ARFS.
>    2. Jakub mentioned calling cpu_rmap_update from the core so that a
>       generic solution wouldn't be blocked.
> 
> I don't know anything about option 1, so I looked at option 2.
> 
> At the time when I read the code, it seemed that cpu_rmap_update
> required some state be passed in (struct irq_glue), so in that case,
> the only way to call cpu_rmap_update from the core would be to
> maintain some state about ARFS in the core, too, so that drivers
> which support ARFS won't be broken by this change.
> 
> At that time there was no persistent per-NAPI config, but since
> there is now, there might be a way to solve this.
> 
> Just guessing here, but maybe one way to solve this would be to move
> ARFS into the core by:
>    - Adding a new bit in addition to NAPIF_F_IRQ_AFFINITY... I don't
>      know NAPIF_F_ARFS_AFFINITY or something? so that drivers
>      could express that they support ARFS.
>    - Remove the driver calls to irq_cpu_rmap_add and make sure to
>      pass the new bit in for drivers that support ARFS (in your
>      changeset, I believe that would be at least ice, mlx4, and
>      bnxt... possibly more?).
>    - In the generic core code, if the ARFS bit is set then you pass
>      in the state needed for ARFS to work, otherwise do what the
>      proposed code is doing now.
> 
> But, that's just a guess. Maybe there's a better way.

I will look into all of this and send a new version, but yes it is clear 
that the core needs to manage ARFS rmap creation and updates beside the 
affinity restoration.

Ahmed


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v1 net-next 3/6] bnxt: use napi's irq affinity
  2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 1/6] net: napi: add irq_flags to napi struct Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
@ 2024-12-10  0:26 ` Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 4/6] mlx4: " Ahmed Zaki
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

Delete the driver CPU affinity info and use the core's napi config
instead.

Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 +++--------------------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 --
 2 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 45b27460d462..d1c9a70514e0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11126,14 +11126,8 @@ static void bnxt_free_irq(struct bnxt *bp)
 		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
 
 		irq = &bp->irq_tbl[map_idx];
-		if (irq->requested) {
-			if (irq->have_cpumask) {
-				irq_update_affinity_hint(irq->vector, NULL);
-				free_cpumask_var(irq->cpu_mask);
-				irq->have_cpumask = 0;
-			}
+		if (irq->requested)
 			free_irq(irq->vector, bp->bnapi[i]);
-		}
 
 		irq->requested = 0;
 	}
@@ -11174,23 +11168,9 @@ static int bnxt_request_irq(struct bnxt *bp)
 		if (rc)
 			break;
 
-		netif_napi_set_irq(&bp->bnapi[i]->napi, irq->vector, 0);
+		netif_napi_set_irq(&bp->bnapi[i]->napi,
+				   irq->vector, NAPIF_F_IRQ_AFFINITY);
 		irq->requested = 1;
-
-		if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
-			int numa_node = dev_to_node(&bp->pdev->dev);
-
-			irq->have_cpumask = 1;
-			cpumask_set_cpu(cpumask_local_spread(i, numa_node),
-					irq->cpu_mask);
-			rc = irq_update_affinity_hint(irq->vector, irq->cpu_mask);
-			if (rc) {
-				netdev_warn(bp->dev,
-					    "Update affinity hint failed, IRQ = %d\n",
-					    irq->vector);
-				break;
-			}
-		}
 	}
 	return rc;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 231e38933984..9942d6f0127b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1225,9 +1225,7 @@ struct bnxt_irq {
 	irq_handler_t	handler;
 	unsigned int	vector;
 	u8		requested:1;
-	u8		have_cpumask:1;
 	char		name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA];
-	cpumask_var_t	cpu_mask;
 };
 
 #define HWRM_RING_ALLOC_TX	0x1
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v1 net-next 4/6] mlx4: use napi's irq affinity
  2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
                   ` (2 preceding siblings ...)
  2024-12-10  0:26 ` [PATCH v1 net-next 3/6] bnxt: use napi's irq affinity Ahmed Zaki
@ 2024-12-10  0:26 ` Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 5/6] ice: " Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 6/6] idpf: " Ahmed Zaki
  5 siblings, 0 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

Delete the driver CPU affinity info and use the core's napi config
instead.

Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_cq.c    |  8 ++--
 .../net/ethernet/mellanox/mlx4/en_netdev.c    | 33 +--------------
 drivers/net/ethernet/mellanox/mlx4/eq.c       | 22 ----------
 drivers/net/ethernet/mellanox/mlx4/main.c     | 42 ++-----------------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h     |  1 -
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |  1 -
 6 files changed, 10 insertions(+), 97 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index b8531283e3ac..8c5ad5e7348a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -90,6 +90,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 			int cq_idx)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct napi_config *napi_config;
 	int irq, err = 0;
 	int timestamp_en = 0;
 	bool assigned_eq = false;
@@ -100,11 +101,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	*cq->mcq.set_ci_db = 0;
 	*cq->mcq.arm_db    = 0;
 	memset(cq->buf, 0, cq->buf_size);
+	napi_config = cq->napi.config;
 
 	if (cq->type == RX) {
 		if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port,
 					     cq->vector)) {
-			cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask);
+			cq->vector = cpumask_first(&napi_config->affinity_mask);
 
 			err = mlx4_assign_eq(mdev->dev, priv->port,
 					     &cq->vector);
@@ -150,7 +152,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	case TX:
 		cq->mcq.comp = mlx4_en_tx_irq;
 		netif_napi_add_tx(cq->dev, &cq->napi, mlx4_en_poll_tx_cq);
-		netif_napi_set_irq(&cq->napi, irq, 0);
+		netif_napi_set_irq(&cq->napi, irq, NAPIF_F_IRQ_AFFINITY);
 		napi_enable(&cq->napi);
 		netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_TX, &cq->napi);
 		break;
@@ -158,7 +160,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 		cq->mcq.comp = mlx4_en_rx_irq;
 		netif_napi_add_config(cq->dev, &cq->napi, mlx4_en_poll_rx_cq,
 				      cq_idx);
-		netif_napi_set_irq(&cq->napi, irq, 0);
+		netif_napi_set_irq(&cq->napi, irq, NAPIF_F_IRQ_AFFINITY);
 		napi_enable(&cq->napi);
 		netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_RX, &cq->napi);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 281b34af0bb4..e4c2532b5909 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1596,24 +1596,6 @@ static void mlx4_en_linkstate_work(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
-static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
-{
-	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
-	int numa_node = priv->mdev->dev->numa_node;
-
-	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
-			ring->affinity_mask);
-	return 0;
-}
-
-static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
-{
-	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
-}
-
 static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
 				      int tx_ring_idx)
 {
@@ -1663,16 +1645,9 @@ int mlx4_en_start_port(struct net_device *dev)
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		cq = priv->rx_cq[i];
 
-		err = mlx4_en_init_affinity_hint(priv, i);
-		if (err) {
-			en_err(priv, "Failed preparing IRQ affinity hint\n");
-			goto cq_err;
-		}
-
 		err = mlx4_en_activate_cq(priv, cq, i);
 		if (err) {
 			en_err(priv, "Failed activating Rx CQ\n");
-			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
 
@@ -1688,7 +1663,6 @@ int mlx4_en_start_port(struct net_device *dev)
 		if (err) {
 			en_err(priv, "Failed setting cq moderation parameters\n");
 			mlx4_en_deactivate_cq(priv, cq);
-			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
 		mlx4_en_arm_cq(priv, cq);
@@ -1874,10 +1848,9 @@ int mlx4_en_start_port(struct net_device *dev)
 mac_err:
 	mlx4_en_put_qp(priv);
 cq_err:
-	while (rx_index--) {
+	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]);
-		mlx4_en_free_affinity_hint(priv, rx_index);
-	}
+
 	for (i = 0; i < priv->rx_ring_num; i++)
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 
@@ -2011,8 +1984,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 		napi_synchronize(&cq->napi);
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 		mlx4_en_deactivate_cq(priv, cq);
-
-		mlx4_en_free_affinity_hint(priv, i);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 9572a45f6143..0f7a1302def2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -233,23 +233,6 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
 	slave_event(dev, slave, eqe);
 }
 
-#if defined(CONFIG_SMP)
-static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec)
-{
-	int hint_err;
-	struct mlx4_dev *dev = &priv->dev;
-	struct mlx4_eq *eq = &priv->eq_table.eq[vec];
-
-	if (!cpumask_available(eq->affinity_mask) ||
-	    cpumask_empty(eq->affinity_mask))
-		return;
-
-	hint_err = irq_update_affinity_hint(eq->irq, eq->affinity_mask);
-	if (hint_err)
-		mlx4_warn(dev, "irq_update_affinity_hint failed, err %d\n", hint_err);
-}
-#endif
-
 int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
 {
 	struct mlx4_eqe eqe;
@@ -1123,8 +1106,6 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
 
 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		if (eq_table->eq[i].have_irq) {
-			free_cpumask_var(eq_table->eq[i].affinity_mask);
-			irq_update_affinity_hint(eq_table->eq[i].irq, NULL);
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
 			eq_table->eq[i].have_irq = 0;
 		}
@@ -1516,9 +1497,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector)
 			clear_bit(*prequested_vector, priv->msix_ctl.pool_bm);
 			*prequested_vector = -1;
 		} else {
-#if defined(CONFIG_SMP)
-			mlx4_set_eq_affinity_hint(priv, *prequested_vector);
-#endif
 			eq_set_ci(&priv->eq_table.eq[*prequested_vector], 1);
 			priv->eq_table.eq[*prequested_vector].have_irq = 1;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index febeadfdd5a5..5f88c297332f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2923,36 +2923,6 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 	return err;
 }
 
-static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
-{
-	int requested_cpu = 0;
-	struct mlx4_priv *priv = mlx4_priv(dev);
-	struct mlx4_eq *eq;
-	int off = 0;
-	int i;
-
-	if (eqn > dev->caps.num_comp_vectors)
-		return -EINVAL;
-
-	for (i = 1; i < port; i++)
-		off += mlx4_get_eqs_per_port(dev, i);
-
-	requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
-
-	/* Meaning EQs are shared, and this call comes from the second port */
-	if (requested_cpu < 0)
-		return 0;
-
-	eq = &priv->eq_table.eq[eqn];
-
-	if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_set_cpu(requested_cpu, eq->affinity_mask);
-
-	return 0;
-}
-
 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2997,19 +2967,13 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 			priv->eq_table.eq[i].irq =
 				entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
 
-			if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
+			if (MLX4_IS_LEGACY_EQ_MODE(dev->caps))
 				bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
 					    dev->caps.num_ports);
-				/* We don't set affinity hint when there
-				 * aren't enough EQs
-				 */
-			} else {
+			else
 				set_bit(port,
 					priv->eq_table.eq[i].actv_ports.ports);
-				if (mlx4_init_affinity_hint(dev, port + 1, i))
-					mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
-						  i);
-			}
+
 			/* We divide the Eqs evenly between the two ports.
 			 * (dev->caps.num_comp_vectors / dev->caps.num_ports)
 			 * refers to the number of Eqs per port
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index d7d856d1758a..66b1ebfd5816 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -403,7 +403,6 @@ struct mlx4_eq {
 	struct mlx4_eq_tasklet	tasklet_ctx;
 	struct mlx4_active_ports actv_ports;
 	u32			ref_count;
-	cpumask_var_t		affinity_mask;
 };
 
 struct mlx4_slave_eqe {
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 28b70dcc652e..41594dd00636 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -357,7 +357,6 @@ struct mlx4_en_rx_ring {
 	unsigned long dropped;
 	unsigned long alloc_fail;
 	int hwtstamp_rx_filter;
-	cpumask_var_t affinity_mask;
 	struct xdp_rxq_info xdp_rxq;
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v1 net-next 5/6] ice: use napi's irq affinity
  2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
                   ` (3 preceding siblings ...)
  2024-12-10  0:26 ` [PATCH v1 net-next 4/6] mlx4: " Ahmed Zaki
@ 2024-12-10  0:26 ` Ahmed Zaki
  2024-12-10  0:26 ` [PATCH v1 net-next 6/6] idpf: " Ahmed Zaki
  5 siblings, 0 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

Delete the driver CPU affinity info and use the core's napi config
instead.

Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h      |  3 --
 drivers/net/ethernet/intel/ice/ice_base.c |  7 ++--
 drivers/net/ethernet/intel/ice/ice_lib.c  |  9 ++---
 drivers/net/ethernet/intel/ice/ice_main.c | 44 -----------------------
 4 files changed, 4 insertions(+), 59 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 2f5d6f974185..0db665a6b38a 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -477,9 +477,6 @@ struct ice_q_vector {
 	struct ice_ring_container rx;
 	struct ice_ring_container tx;
 
-	cpumask_t affinity_mask;
-	struct irq_affinity_notify affinity_notify;
-
 	struct ice_channel *ch;
 
 	char name[ICE_INT_NAME_STR_LEN];
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 82a9cd4ec7ae..48a6e1068223 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -147,10 +147,6 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
 	q_vector->reg_idx = q_vector->irq.index;
 	q_vector->vf_reg_idx = q_vector->irq.index;
 
-	/* only set affinity_mask if the CPU is online */
-	if (cpu_online(v_idx))
-		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
 	/* This will not be called in the driver load path because the netdev
 	 * will not be created yet. All other cases with register the NAPI
 	 * handler here (i.e. resume, reset/rebuild, etc.)
@@ -276,7 +272,8 @@ static void ice_cfg_xps_tx_ring(struct ice_tx_ring *ring)
 	if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state))
 		return;
 
-	netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask,
+	netif_set_xps_queue(ring->netdev,
+			    &ring->q_vector->napi.config->affinity_mask,
 			    ring->q_index);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ff91e70f596f..91fd4271129d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2589,12 +2589,6 @@ void ice_vsi_free_irq(struct ice_vsi *vsi)
 		      vsi->q_vectors[i]->num_ring_rx))
 			continue;
 
-		/* clear the affinity notifier in the IRQ descriptor */
-		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
-			irq_set_affinity_notifier(irq_num, NULL);
-
-		/* clear the affinity_hint in the IRQ descriptor */
-		irq_update_affinity_hint(irq_num, NULL);
 		synchronize_irq(irq_num);
 		devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
 	}
@@ -2735,7 +2729,8 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
 	ice_for_each_q_vector(vsi, v_idx) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
 
-		netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq, 0);
+		netif_napi_set_irq(&q_vector->napi,
+				   q_vector->irq.virq, NAPIF_F_IRQ_AFFINITY);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1eaa4428fd24..93150d9ad565 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2504,34 +2504,6 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
 	return 0;
 }
 
-/**
- * ice_irq_affinity_notify - Callback for affinity changes
- * @notify: context as to what irq was changed
- * @mask: the new affinity mask
- *
- * This is a callback function used by the irq_set_affinity_notifier function
- * so that we may register to receive changes to the irq affinity masks.
- */
-static void
-ice_irq_affinity_notify(struct irq_affinity_notify *notify,
-			const cpumask_t *mask)
-{
-	struct ice_q_vector *q_vector =
-		container_of(notify, struct ice_q_vector, affinity_notify);
-
-	cpumask_copy(&q_vector->affinity_mask, mask);
-}
-
-/**
- * ice_irq_affinity_release - Callback for affinity notifier release
- * @ref: internal core kernel usage
- *
- * This is a callback function used by the irq_set_affinity_notifier function
- * to inform the current notification subscriber that they will no longer
- * receive notifications.
- */
-static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
-
 /**
  * ice_vsi_ena_irq - Enable IRQ for the given VSI
  * @vsi: the VSI being configured
@@ -2595,19 +2567,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 				   err);
 			goto free_q_irqs;
 		}
-
-		/* register for affinity change notifications */
-		if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
-			struct irq_affinity_notify *affinity_notify;
-
-			affinity_notify = &q_vector->affinity_notify;
-			affinity_notify->notify = ice_irq_affinity_notify;
-			affinity_notify->release = ice_irq_affinity_release;
-			irq_set_affinity_notifier(irq_num, affinity_notify);
-		}
-
-		/* assign the mask for this irq */
-		irq_update_affinity_hint(irq_num, &q_vector->affinity_mask);
 	}
 
 	err = ice_set_cpu_rx_rmap(vsi);
@@ -2623,9 +2582,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 free_q_irqs:
 	while (vector--) {
 		irq_num = vsi->q_vectors[vector]->irq.virq;
-		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
-			irq_set_affinity_notifier(irq_num, NULL);
-		irq_update_affinity_hint(irq_num, NULL);
 		devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
 	}
 	return err;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v1 net-next 6/6] idpf: use napi's irq affinity
  2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
                   ` (4 preceding siblings ...)
  2024-12-10  0:26 ` [PATCH v1 net-next 5/6] ice: " Ahmed Zaki
@ 2024-12-10  0:26 ` Ahmed Zaki
  5 siblings, 0 replies; 10+ messages in thread
From: Ahmed Zaki @ 2024-12-10  0:26 UTC (permalink / raw)
  To: netdev
  Cc: intel-wired-lan, andrew+netdev, edumazet, kuba, pabeni, davem,
	michael.chan, tariqt, anthony.l.nguyen, przemyslaw.kitszel,
	Ahmed Zaki

Delete the driver CPU affinity info and use the core's napi config
instead.

Cc: intel-wired-lan@lists.osuosl.org
Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 +++++--------------
 drivers/net/ethernet/intel/idpf/idpf_txrx.h |  6 ++----
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index da2a5becf62f..6f119e0d4136 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3553,8 +3553,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport)
 		q_vector->tx = NULL;
 		kfree(q_vector->rx);
 		q_vector->rx = NULL;
-
-		free_cpumask_var(q_vector->affinity_mask);
 	}
 
 	kfree(vport->q_vectors);
@@ -3581,8 +3579,6 @@ static void idpf_vport_intr_rel_irq(struct idpf_vport *vport)
 		vidx = vport->q_vector_idxs[vector];
 		irq_num = adapter->msix_entries[vidx].vector;
 
-		/* clear the affinity_mask in the IRQ descriptor */
-		irq_set_affinity_hint(irq_num, NULL);
 		kfree(free_irq(irq_num, q_vector));
 	}
 }
@@ -3761,8 +3757,6 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
 				   "Request_irq failed, error: %d\n", err);
 			goto free_q_irqs;
 		}
-		/* assign the mask for this irq */
-		irq_set_affinity_hint(irq_num, q_vector->affinity_mask);
 	}
 
 	return 0;
@@ -4183,12 +4177,12 @@ static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport)
 
 	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
 		struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
+		int irq_num = vport->adapter->msix_entries[v_idx].vector;
 
-		netif_napi_add(vport->netdev, &q_vector->napi, napi_poll);
-
-		/* only set affinity_mask if the CPU is online */
-		if (cpu_online(v_idx))
-			cpumask_set_cpu(v_idx, q_vector->affinity_mask);
+		netif_napi_add_config(vport->netdev, &q_vector->napi,
+				      napi_poll, v_idx);
+		netif_napi_set_irq(&q_vector->napi,
+				   irq_num, NAPIF_F_IRQ_AFFINITY);
 	}
 }
 
@@ -4232,9 +4226,6 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
 		q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC;
 		q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0;
 
-		if (!zalloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL))
-			goto error;
-
 		q_vector->tx = kcalloc(txqs_per_vector, sizeof(*q_vector->tx),
 				       GFP_KERNEL);
 		if (!q_vector->tx)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 9c1fe84108ed..5efb3402b378 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -397,7 +397,6 @@ struct idpf_intr_reg {
  * @rx_intr_mode: Dynamic ITR or not
  * @rx_itr_idx: RX ITR index
  * @v_idx: Vector index
- * @affinity_mask: CPU affinity mask
  */
 struct idpf_q_vector {
 	__cacheline_group_begin_aligned(read_mostly);
@@ -434,13 +433,12 @@ struct idpf_q_vector {
 	__cacheline_group_begin_aligned(cold);
 	u16 v_idx;
 
-	cpumask_var_t affinity_mask;
 	__cacheline_group_end_aligned(cold);
 };
 libeth_cacheline_set_assert(struct idpf_q_vector, 112,
 			    24 + sizeof(struct napi_struct) +
 			    2 * sizeof(struct dim),
-			    8 + sizeof(cpumask_var_t));
+			    8);
 
 struct idpf_rx_queue_stats {
 	u64_stats_t packets;
@@ -934,7 +932,7 @@ static inline int idpf_q_vector_to_mem(const struct idpf_q_vector *q_vector)
 	if (!q_vector)
 		return NUMA_NO_NODE;
 
-	cpu = cpumask_first(q_vector->affinity_mask);
+	cpu = cpumask_first(&q_vector->napi.config->affinity_mask);
 
 	return cpu < nr_cpu_ids ? cpu_to_mem(cpu) : NUMA_NO_NODE;
 }
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-12-11 16:33 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-10  0:26 [PATCH v1 net-next 0/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
2024-12-10  0:26 ` [PATCH v1 net-next 1/6] net: napi: add irq_flags to napi struct Ahmed Zaki
2024-12-10  0:26 ` [PATCH v1 net-next 2/6] net: napi: add CPU affinity to napi->config Ahmed Zaki
2024-12-10  1:29   ` Joe Damato
2024-12-11  4:21     ` Jakub Kicinski
2024-12-11 16:33     ` Ahmed Zaki
2024-12-10  0:26 ` [PATCH v1 net-next 3/6] bnxt: use napi's irq affinity Ahmed Zaki
2024-12-10  0:26 ` [PATCH v1 net-next 4/6] mlx4: " Ahmed Zaki
2024-12-10  0:26 ` [PATCH v1 net-next 5/6] ice: " Ahmed Zaki
2024-12-10  0:26 ` [PATCH v1 net-next 6/6] idpf: " Ahmed Zaki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).