From: Saeed Mahameed <saeedm@mellanox.com>
To: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org, Saeed Mahameed <saeedm@mellanox.com>,
Sagi Grimberg <sagi@grimberg.me>,
Thomas Gleixner <tglx@linutronix.de>,
Jes Sorensen <jsorensen@fb.com>
Subject: [net 02/14] Revert "mlx5: move affinity hints assignments to generic code"
Date: Wed, 20 Dec 2017 00:24:44 +0200 [thread overview]
Message-ID: <20171219222456.29627-3-saeedm@mellanox.com> (raw)
In-Reply-To: <20171219222456.29627-1-saeedm@mellanox.com>
Before the offending commit, mlx5 core did the IRQ affinity itself,
and it seems that the new generic code have some drawbacks and one
of them is the lack for user ability to modify irq affinity after
the initial affinity values got assigned.
The issue is still being discussed and a solution in the new generic code
is required, until then we need to revert this patch.
This fixes the following issue:
echo <new affinity> > /proc/irq/<x>/smp_affinity
fails with -EIO
This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664.
Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since
it is used in mlx5_ib driver.
Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code")
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jes Sorensen <jsorensen@fb.com>
Reported-by: Jes Sorensen <jsorensen@fb.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++-------
drivers/net/ethernet/mellanox/mlx5/core/main.c | 75 +++++++++++++++++++++--
include/linux/mlx5/driver.h | 1 +
4 files changed, 93 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c0872b3284cb..43f9054830e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -590,6 +590,7 @@ struct mlx5e_channel {
struct mlx5_core_dev *mdev;
struct hwtstamp_config *tstamp;
int ix;
+ int cpu;
};
struct mlx5e_channels {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a3e512..cbec66bc82f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
struct mlx5e_cq_param icosq_cq;
};
-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
-{
- return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
-}
-
static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -444,17 +439,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
int mtt_sz = mlx5e_get_wqe_mtt_sz();
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
- int node = mlx5e_get_node(c->priv, c->ix);
int i;
rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
- GFP_KERNEL, node);
+ GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->mpwqe.info)
goto err_out;
/* We allocate more than mtt_sz as we will align the pointer */
- rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
- GFP_KERNEL, node);
+ rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+ cpu_to_node(c->cpu));
if (unlikely(!rq->mpwqe.mtt_no_align))
goto err_free_wqe_info;
@@ -562,7 +556,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
int err;
int i;
- rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+ rqp->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
&rq->wq_ctrl);
@@ -629,8 +623,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
default: /* MLX5_WQ_TYPE_LINKED_LIST */
rq->wqe.frag_info =
kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
- GFP_KERNEL,
- mlx5e_get_node(c->priv, c->ix));
+ GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->wqe.frag_info) {
err = -ENOMEM;
goto err_rq_wq_destroy;
@@ -1000,13 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
- param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
- err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+ err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
if (err)
goto err_sq_wq_destroy;
@@ -1053,13 +1046,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
sq->channel = c;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
- param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
- err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
+ err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
if (err)
goto err_sq_wq_destroy;
@@ -1126,13 +1119,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
- param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
- err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+ err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
if (err)
goto err_sq_wq_destroy;
@@ -1504,8 +1497,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->priv->mdev;
int err;
- param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
- param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+ param->wq.buf_numa_node = cpu_to_node(c->cpu);
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
param->eq_ix = c->ix;
err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1597,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+ return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
+}
+
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
@@ -1752,12 +1750,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
{
struct mlx5e_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
+ int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
int eqn;
- c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
+ c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
if (!c)
return -ENOMEM;
@@ -1765,6 +1764,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->mdev = priv->mdev;
c->tstamp = &priv->tstamp;
c->ix = ix;
+ c->cpu = cpu;
c->pdev = &priv->mdev->pdev->dev;
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1853,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_rq(&c->rq);
- netif_set_xps_queue(c->netdev,
- mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
+ netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f323442cc5a..8a89c7e8cd63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_eq_table *table = &priv->eq_table;
- struct irq_affinity irqdesc = {
- .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
- };
int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
int nvec;
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
if (!priv->irq_info)
goto err_free_msix;
- nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
+ nvec = pci_alloc_irq_vectors(dev->pdev,
MLX5_EQ_VEC_COMP_BASE + 1, nvec,
- PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
- &irqdesc);
+ PCI_IRQ_MSIX);
if (nvec < 0)
return nvec;
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
return (u64)timer_l | (u64)timer_h1 << 32;
}
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+ if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ priv->irq_info[i].mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+ err = mlx5_irq_set_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ mlx5_irq_clear_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+ mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
unsigned int *irqn)
{
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_stop_eqs;
}
+ err = mlx5_irq_set_affinity_hints(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+ goto err_affinity_hints;
+ }
+
err = mlx5_init_fs(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_cleanup_fs(dev);
err_fs:
+ mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
free_comp_eqs(dev);
err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_sriov_detach(dev);
mlx5_cleanup_fs(dev);
+ mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
mlx5_stop_eqs(dev);
mlx5_put_uars_page(dev, priv->uar);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a886b51511ab..40a6f33c4cde 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
};
struct mlx5_irq_info {
+ cpumask_var_t mask;
char name[MLX5_MAX_IRQ_NAME];
};
--
2.13.0
next prev parent reply other threads:[~2017-12-19 22:27 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-19 22:24 [pull request][net 00/14] Mellanox, mlx5 fixes 2017-12-19 Saeed Mahameed
2017-12-19 22:24 ` [net 01/14] net/mlx5: FPGA, return -EINVAL if size is zero Saeed Mahameed
2017-12-19 22:24 ` Saeed Mahameed [this message]
2017-12-19 22:42 ` [net 02/14] Revert "mlx5: move affinity hints assignments to generic code" Jes Sorensen
2017-12-25 13:53 ` Sagi Grimberg
2017-12-26 15:58 ` Saeed Mahameed
2017-12-26 17:14 ` Sagi Grimberg
2018-01-04 20:19 ` Saeed Mahameed
2017-12-19 22:24 ` [net 03/14] net/mlx5: Fix rate limit packet pacing naming and struct Saeed Mahameed
2017-12-19 22:24 ` [net 04/14] net/mlx5e: Fix ETS BW check Saeed Mahameed
2017-12-20 7:42 ` Or Gerlitz
2017-12-19 22:24 ` [net 05/14] net/mlx5e: Fix features check of IPv6 traffic Saeed Mahameed
2017-12-20 7:45 ` Or Gerlitz
2017-12-19 22:24 ` [net 06/14] net/mlx5e: Fix defaulting RX ring size when not needed Saeed Mahameed
2017-12-19 22:24 ` [net 07/14] net/mlx5: Fix misspelling in the error message and comment Saeed Mahameed
2017-12-19 22:24 ` [net 08/14] net/mlx5: Fix error flow in CREATE_QP command Saeed Mahameed
2017-12-19 22:24 ` [net 09/14] net/mlx5e: Fix possible deadlock of VXLAN lock Saeed Mahameed
2017-12-19 22:24 ` [net 10/14] net/mlx5e: Add refcount to VXLAN structure Saeed Mahameed
2017-12-19 22:24 ` [net 11/14] net/mlx5e: Prevent possible races in VXLAN control flow Saeed Mahameed
2017-12-19 22:24 ` [net 12/14] net/mlx5: Fix steering memory leak Saeed Mahameed
2017-12-19 22:24 ` [net 13/14] net/mlx5: Cleanup IRQs in case of unload failure Saeed Mahameed
2017-12-19 22:24 ` [net 14/14] net/mlx5: Stay in polling mode when command EQ destroy fails Saeed Mahameed
2017-12-20 18:42 ` [pull request][net 00/14] Mellanox, mlx5 fixes 2017-12-19 David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171219222456.29627-3-saeedm@mellanox.com \
--to=saeedm@mellanox.com \
--cc=davem@davemloft.net \
--cc=jsorensen@fb.com \
--cc=netdev@vger.kernel.org \
--cc=sagi@grimberg.me \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox