Re: [PATCH net-next V2 2/2] net/mlx5e: Improve remote NUMA preferences used for the IRQ affinity hints

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Tariq Toukan <tariqt@nvidia.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	Saeed Mahameed <saeedm@nvidia.com>,
	Jakub Kicinski <kuba@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Eric Dumazet <edumazet@google.com>,
	Paolo Abeni <pabeni@redhat.com>,
	netdev@vger.kernel.org, Gal Pressman <gal@nvidia.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH net-next V2 2/2] net/mlx5e: Improve remote NUMA preferences used for the IRQ affinity hints
Date: Mon, 18 Jul 2022 15:50:06 +0200	[thread overview]
Message-ID: <YtVlDiLTPxm312u+@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <20220718124315.16648-3-tariqt@nvidia.com>

On Mon, Jul 18, 2022 at 03:43:15PM +0300, Tariq Toukan wrote:

> Reviewed-by: Gal Pressman <gal@nvidia.com>
> Acked-by: Saeed Mahameed <saeedm@nvidia.com>
> Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/eq.c | 62 +++++++++++++++++++-
>  1 file changed, 59 insertions(+), 3 deletions(-)
> 
> v2:
> Separated the set_cpu operation into two functions, per Saeed's suggestion.
> Added Saeed's Acked-by signature.
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
> index 229728c80233..e72bdaaad84f 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
> @@ -11,6 +11,9 @@
>  #ifdef CONFIG_RFS_ACCEL
>  #include <linux/cpu_rmap.h>
>  #endif
> +#ifdef CONFIG_NUMA
> +#include <linux/sched/topology.h>
> +#endif
>  #include "mlx5_core.h"
>  #include "lib/eq.h"
>  #include "fpga/core.h"
> @@ -806,13 +809,67 @@ static void comp_irqs_release(struct mlx5_core_dev *dev)
>  	kfree(table->comp_irqs);
>  }
>  
> +static void set_cpus_by_local_spread(struct mlx5_core_dev *dev, u16 *cpus,
> +				     int ncomp_eqs)
> +{
> +	int i;
> +
> +	for (i = 0; i < ncomp_eqs; i++)
> +		cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
> +}
> +
> +static bool set_cpus_by_numa_distance(struct mlx5_core_dev *dev, u16 *cpus,
> +				      int ncomp_eqs)
> +{
> +#ifdef CONFIG_NUMA
> +	cpumask_var_t cpumask;
> +	int first;
> +	int i;
> +
> +	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) {
> +		mlx5_core_err(dev, "zalloc_cpumask_var failed\n");
> +		return false;
> +	}
> +	cpumask_copy(cpumask, cpu_online_mask);
> +
> +	first = cpumask_local_spread(0, dev->priv.numa_node);

Arguably you want something like:

	first = cpumask_any(cpumask_of_node(dev->priv.numa_node));

> +
> +	for (i = 0; i < ncomp_eqs; i++) {
> +		int cpu;
> +
> +		cpu = sched_numa_find_closest(cpumask, first);
> +		if (cpu >= nr_cpu_ids) {
> +			mlx5_core_err(dev, "sched_numa_find_closest failed, cpu(%d) >= nr_cpu_ids(%d)\n",
> +				      cpu, nr_cpu_ids);
> +
> +			free_cpumask_var(cpumask);
> +			return false;

So this will fail when ncomp_eqs > cpumask_weight(online_cpus); is that
desired?

> +		}
> +		cpus[i] = cpu;
> +		cpumask_clear_cpu(cpu, cpumask);

Since there is no concurrency on this cpumask, you don't need atomic
ops:

		__cpumask_clear_cpu(..);

> +	}
> +
> +	free_cpumask_var(cpumask);
> +	return true;
> +#else
> +	return false;
> +#endif
> +}
> +
> +static void mlx5_set_eqs_cpus(struct mlx5_core_dev *dev, u16 *cpus, int ncomp_eqs)
> +{
> +	bool success = set_cpus_by_numa_distance(dev, cpus, ncomp_eqs);
> +
> +	if (!success)
> +		set_cpus_by_local_spread(dev, cpus, ncomp_eqs);
> +}
> +
>  static int comp_irqs_request(struct mlx5_core_dev *dev)
>  {
>  	struct mlx5_eq_table *table = dev->priv.eq_table;
>  	int ncomp_eqs = table->num_comp_eqs;
>  	u16 *cpus;
>  	int ret;
> -	int i;
>  
>  	ncomp_eqs = table->num_comp_eqs;
>  	table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
> @@ -830,8 +887,7 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
>  		ret = -ENOMEM;
>  		goto free_irqs;
>  	}
> -	for (i = 0; i < ncomp_eqs; i++)
> -		cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
> +	mlx5_set_eqs_cpus(dev, cpus, ncomp_eqs);

So you change this for mlx5, what about the other users of
cpumask_local_spread() ?

next prev parent reply	other threads:[~2022-07-18 13:50 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-18 12:43 [PATCH net-next V2 0/2] mlx5: Use NUMA distance metrics Tariq Toukan
2022-07-18 12:43 ` [PATCH net-next V2 1/2] sched/topology: Expose sched_numa_find_closest Tariq Toukan
2022-07-18 13:47   ` Peter Zijlstra
2022-07-18 12:43 ` [PATCH net-next V2 2/2] net/mlx5e: Improve remote NUMA preferences used for the IRQ affinity hints Tariq Toukan
2022-07-18 13:50   ` Peter Zijlstra [this message]
2022-07-18 19:49     ` Tariq Toukan
2022-07-18 21:24       ` Peter Zijlstra
2022-07-18 21:57       ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YtVlDiLTPxm312u+@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=juri.lelli@redhat.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=tariqt@nvidia.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.