Netdev List
 help / color / mirror / Atom feed
* [net-next 13/16] net/mlx5e: Report and recover from CQE with error on RQ
From: Saeed Mahameed @ 2019-08-15 19:10 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Add support for report and recovery from error on completion on RQ by
setting the queue back to ready state. Handle only errors with a
syndrome indicating the RQ might enter error state and could be
recovered.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  3 +
 .../ethernet/mellanox/mlx5/core/en/health.h   |  9 +++
 .../mellanox/mlx5/core/en/reporter_rx.c       | 66 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  9 +++
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 11 ++++
 5 files changed, 98 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5f2a1d14de68..822f7b620640 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -295,6 +295,7 @@ struct mlx5e_dcbx_dp {
 
 enum {
 	MLX5E_RQ_STATE_ENABLED,
+	MLX5E_RQ_STATE_RECOVERING,
 	MLX5E_RQ_STATE_AM,
 	MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
 	MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
@@ -667,6 +668,8 @@ struct mlx5e_rq {
 	struct zero_copy_allocator zca;
 	struct xdp_umem       *umem;
 
+	struct work_struct     recover_work;
+
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
 	__be32                 mkey_be;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 52e9ca37cf46..d3693fa547ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -8,6 +8,14 @@
 
 #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
 
+static inline bool cqe_syndrome_needs_recover(u8 syndrome)
+{
+	return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR ||
+	       syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+	       syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+	       syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+}
+
 int mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
 void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
@@ -21,6 +29,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 4f5547ac4bee..b4f7e535dbc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -111,6 +111,72 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
 }
 
+static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+{
+	struct net_device *dev = rq->netdev;
+	int err;
+
+	err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
+	if (err) {
+		netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+		return err;
+	}
+	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+	if (err) {
+		netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+		return err;
+	}
+
+	return 0;
+}
+
+static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
+{
+	struct mlx5e_rq *rq = ctx;
+	struct mlx5_core_dev *mdev = rq->mdev;
+	struct net_device *dev = rq->netdev;
+	u8 state;
+	int err;
+
+	err = mlx5e_query_rq_state(mdev, rq->rqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n",
+			   rq->rqn, err);
+		goto out;
+	}
+
+	if (state != MLX5_RQC_STATE_ERR)
+		goto out;
+
+	mlx5e_deactivate_rq(rq);
+	mlx5e_free_rx_descs(rq);
+
+	err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
+	if (err)
+		goto out;
+
+	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+	mlx5e_activate_rq(rq);
+	rq->stats->recover++;
+	return 0;
+out:
+	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+	return err;
+}
+
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
+{
+	struct mlx5e_priv *priv = rq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = rq;
+	err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
+	sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
 static int mlx5e_rx_reporter_timeout_recover(void *ctx)
 {
 	struct mlx5e_rq *rq = ctx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 76845bafd708..77f0c8fad9df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -362,6 +362,13 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq)
 	kvfree(rq->wqe.di);
 }
 
+static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
+{
+	struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
+
+	mlx5e_reporter_rq_cqe_err(rq);
+}
+
 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
@@ -398,6 +405,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
 	else
 		rq->stats = &c->priv->channel_stats[c->ix].rq;
+	INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
 
 	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
 	if (IS_ERR(rq->xdp_prog)) {
@@ -907,6 +915,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	cancel_work_sync(&rq->dim.work);
 	cancel_work_sync(&rq->channel->icosq.recover_work);
+	cancel_work_sync(&rq->recover_work);
 	mlx5e_destroy_rq(rq);
 	mlx5e_free_rx_descs(rq);
 	mlx5e_free_rq(rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 43d790b7d4ec..2fd2760d0bb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1130,6 +1130,15 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	return skb;
 }
 
+static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+	struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
+
+	if (cqe_syndrome_needs_recover(err_cqe->syndrome) &&
+	    !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state))
+		queue_work(rq->channel->priv->wq, &rq->recover_work);
+}
+
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -1143,6 +1152,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		trigger_report(rq, cqe);
 		rq->stats->wqe_err++;
 		goto free_wqe;
 	}
@@ -1328,6 +1338,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	wi->consumed_strides += cstrides;
 
 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		trigger_report(rq, cqe);
 		rq->stats->wqe_err++;
 		goto mpwrq_cqe_out;
 	}
-- 
2.21.0


^ permalink raw reply related

* [net-next 08/16] net/mlx5e: Add support to rx reporter diagnose
From: Saeed Mahameed @ 2019-08-15 19:10 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Add rx reporter, which supports diagnose call-back. Diagnostics output
include: information common to all RQs: RQ type, RQ size, RQ stride
size, CQ size and CQ stride size. In addition advertise information per
RQ and its related icosq and attached CQ.

$ devlink health diagnose pci/0000:00:0b.0 reporter rx
 Common config:
   RQ:
     type: 2 stride size: 2048 size: 8
   CQ:
     stride size: 64 size: 1024
 RQs:
   channel ix: 0 rqn: 4308 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1
   CQ:
     cqn: 1032 HW status: 0
   channel ix: 1 rqn: 4313 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1
   CQ:
     cqn: 1036 HW status: 0
   channel ix: 2 rqn: 4318 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1
   CQ:
     cqn: 1040 HW status: 0
   channel ix: 3 rqn: 4323 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1
   CQ:
     cqn: 1044 HW status: 0

$ devlink health diagnose pci/0000:00:0b.0 reporter rx -jp
{
    "Common config": {
        "RQ": {
            "type": 2,
            "stride size": 2048,
            "size": 8
        },
        "CQ": {
            "stride size": 64,
            "size": 1024
        }
    },
    "RQs": [ {
            "channel ix": 0,
            "rqn": 4308,
            "HW state": 1,
            "SW state": 3,
            "posted WQEs": 7,
            "cc": 7,
            "ICOSQ HW state": 1,
            "CQ": {
                "cqn": 1032,
                "HW status": 0
            }
        },{
            "channel ix": 1,
            "rqn": 4313,
            "HW state": 1,
            "SW state": 3,
            "posted WQEs": 7,
            "cc": 7,
            "ICOSQ HW state": 1,
            "CQ": {
                "cqn": 1036,
                "HW status": 0
            }
        },{
            "channel ix": 2,
            "rqn": 4318,
            "HW state": 1,
            "SW state": 3,
            "posted WQEs": 7,
            "cc": 7,
            "ICOSQ HW state": 1,
            "CQ": {
                "cqn": 1040,
                "HW status": 0
            }
        },{
            "channel ix": 3,
            "rqn": 4323,
            "HW state": 1,
            "SW state": 3,
            "posted WQEs": 7,
            "cc": 7,
            "ICOSQ HW state": 1,
            "CQ": {
                "cqn": 1044,
                "HW status": 0
            }
        } ]
}

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  21 ++
 .../ethernet/mellanox/mlx5/core/en/health.c   |  16 +-
 .../ethernet/mellanox/mlx5/core/en/health.h   |   3 +
 .../mellanox/mlx5/core/en/reporter_rx.c       | 195 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  20 --
 6 files changed, 236 insertions(+), 23 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 23d566a45a30..a3b9659649a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -24,8 +24,8 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
 		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
-		en/reporter_tx.o en/params.o en/xsk/umem.o en/xsk/setup.o \
-		en/xsk/rx.o en/xsk/tx.o
+		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
+		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
 
 #
 # Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0807992090b8..de234650ba57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -841,6 +841,7 @@ struct mlx5e_priv {
 	struct mlx5e_tls          *tls;
 #endif
 	struct devlink_health_reporter *tx_reporter;
+	struct devlink_health_reporter *rx_reporter;
 	struct mlx5e_xsk           xsk;
 };
 
@@ -882,6 +883,26 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
 
+static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+	switch (rq->wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+	default:
+		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+	}
+}
+
+static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+	switch (rq->wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return rq->mpwqe.wq.cur_sz;
+	default:
+		return rq->wqe.wq.cur_sz;
+	}
+}
+
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index c003757fbec0..191d609b0d99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -98,11 +98,22 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *
 
 int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
 {
-	return  mlx5e_reporter_tx_create(priv);
+	int err;
+
+	err = mlx5e_reporter_tx_create(priv);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_rx_create(priv);
+	if (err)
+		return err;
+
+	return 0;
 }
 
 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
 {
+	mlx5e_reporter_rx_destroy(priv);
 	mlx5e_reporter_tx_destroy(priv);
 }
 
@@ -111,6 +122,9 @@ void mlx5e_health_channels_update(struct mlx5e_priv *priv)
 	if (priv->tx_reporter)
 		devlink_health_reporter_state_update(priv->tx_reporter,
 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+	if (priv->rx_reporter)
+		devlink_health_reporter_state_update(priv->rx_reporter,
+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 }
 
 int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index b2c0ccc79b22..a751c5316baf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -16,6 +16,9 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *
 int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
 int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
 
+int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
 
 struct mlx5e_err_ctx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
new file mode 100644
index 000000000000..66611c50e1c9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "params.h"
+
+static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+	void *out;
+	void *rqc;
+	int err;
+
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_rq(dev, rqn, out);
+	if (err)
+		goto out;
+
+	rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+	*state = MLX5_GET(rqc, rqc, state);
+
+out:
+	kvfree(out);
+	return err;
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
+						   struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = rq->channel->priv;
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
+	u8 icosq_hw_state;
+	int wqes_sz;
+	u8 hw_state;
+	u16 wq_head;
+	int err;
+
+	err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state);
+	if (err)
+		return err;
+
+	err = mlx5_core_query_sq_state(priv->mdev, icosq->sqn, &icosq_hw_state);
+	if (err)
+		return err;
+
+	wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
+	wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+		  rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq);
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->channel->ix);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
+				      struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_rq *generic_rq;
+	u32 rq_stride, rq_sz;
+	int i, err = 0;
+
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+
+	generic_rq = &priv->channels.c[0]->rq;
+	rq_sz = mlx5e_rqwq_get_size(generic_rq);
+	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config");
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ");
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
+	if (err)
+		goto unlock;
+
+	for (i = 0; i < priv->channels.num; i++) {
+		struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
+
+		err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
+		if (err)
+			goto unlock;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+	if (err)
+		goto unlock;
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
+}
+
+static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
+	.name = "rx",
+	.diagnose = mlx5e_rx_reporter_diagnose,
+};
+
+int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
+{
+	struct devlink_health_reporter *reporter;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct devlink *devlink = priv_to_devlink(mdev);
+
+	reporter = devlink_health_reporter_create(devlink,
+						  &mlx5_rx_reporter_ops,
+						  0, false, priv);
+	if (IS_ERR(reporter)) {
+		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
+			    PTR_ERR(reporter));
+		return PTR_ERR(reporter);
+	}
+	priv->rx_reporter = reporter;
+	return 0;
+}
+
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
+{
+	if (!priv->rx_reporter)
+		return;
+
+	devlink_health_reporter_destroy(priv->rx_reporter);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 656e9be4f301..006e33e718d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -247,26 +247,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
-static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
-{
-	switch (rq->wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
-	default:
-		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
-	}
-}
-
-static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
-{
-	switch (rq->wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return rq->mpwqe.wq.cur_sz;
-	default:
-		return rq->wqe.wq.cur_sz;
-	}
-}
-
 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 				     struct mlx5e_channel *c)
 {
-- 
2.21.0


^ permalink raw reply related

* [net-next 10/16] net/mlx5e: Report and recover from CQE error on ICOSQ
From: Saeed Mahameed @ 2019-08-15 19:10 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Add support for report and recovery from error on completion on ICOSQ.
Deactivate RQ and flush, then deactivate ICOSQ. Set the queue back to
ready state (firmware) and reset the ICOSQ and the RQ (software
resources). Finally, activate the ICOSQ and the RQ.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   8 ++
 .../ethernet/mellanox/mlx5/core/en/health.h   |   1 +
 .../mellanox/mlx5/core/en/reporter_rx.c       | 105 +++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  22 +++-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |   2 +
 .../ethernet/mellanox/mlx5/core/en_stats.c    |   3 +
 .../ethernet/mellanox/mlx5/core/en_stats.h    |   2 +
 7 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index de234650ba57..5f2a1d14de68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -546,6 +546,8 @@ struct mlx5e_icosq {
 	/* control path */
 	struct mlx5_wq_ctrl        wq_ctrl;
 	struct mlx5e_channel      *channel;
+
+	struct work_struct         recover_work;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_wqe_frag_info {
@@ -1021,6 +1023,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params);
+int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
+void mlx5e_activate_rq(struct mlx5e_rq *rq);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
+void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
 
 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
 		    struct mlx5e_modify_sq_param *p);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index a751c5316baf..8acd9dc520cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -18,6 +18,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
 
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 66611c50e1c9..1eec17a36d00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -27,6 +27,105 @@ static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
 	return err;
 }
 
+static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
+{
+	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+	while (time_before(jiffies, exp_time)) {
+		if (icosq->cc == icosq->pc)
+			return 0;
+
+		msleep(20);
+	}
+
+	netdev_err(icosq->channel->netdev,
+		   "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n",
+		   icosq->sqn, icosq->cc, icosq->pc);
+
+	return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
+{
+	WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+		  icosq->sqn, icosq->cc, icosq->pc);
+	icosq->cc = 0;
+	icosq->pc = 0;
+}
+
+static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+{
+	struct mlx5e_icosq *icosq = ctx;
+	struct mlx5_core_dev *mdev = icosq->channel->mdev;
+	struct net_device *dev = icosq->channel->netdev;
+	struct mlx5e_rq *rq = &icosq->channel->rq;
+	u8 state;
+	int err;
+
+	err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n",
+			   icosq->sqn, err);
+		goto out;
+	}
+
+	if (state != MLX5_SQC_STATE_ERR)
+		goto out;
+
+	mlx5e_deactivate_rq(rq);
+	err = mlx5e_wait_for_icosq_flush(icosq);
+	if (err)
+		goto out;
+
+	mlx5e_deactivate_icosq(icosq);
+
+	/* At this point, both the rq and the icosq are disabled */
+
+	err = mlx5e_health_sq_to_ready(icosq->channel, icosq->sqn);
+	if (err)
+		goto out;
+
+	mlx5e_reset_icosq_cc_pc(icosq);
+	mlx5e_free_rx_descs(rq);
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+	mlx5e_activate_icosq(icosq);
+	mlx5e_activate_rq(rq);
+
+	rq->stats->recover++;
+	return 0;
+out:
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+	return err;
+}
+
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+{
+	struct mlx5e_priv *priv = icosq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = icosq;
+	err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
+	sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+	return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
+				     void *context)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_err_ctx *err_ctx = context;
+
+	return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) :
+			 mlx5e_health_recover_channels(priv);
+}
+
 static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
 						   struct devlink_fmsg *fmsg)
 {
@@ -165,9 +264,12 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
 
 static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
 	.name = "rx",
+	.recover = mlx5e_rx_reporter_recover,
 	.diagnose = mlx5e_rx_reporter_diagnose,
 };
 
+#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
+
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
 {
 	struct devlink_health_reporter *reporter;
@@ -176,7 +278,8 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
 
 	reporter = devlink_health_reporter_create(devlink,
 						  &mlx5_rx_reporter_ops,
-						  0, false, priv);
+						  MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
+						  true, priv);
 	if (IS_ERR(reporter)) {
 		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
 			    PTR_ERR(reporter));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b9a7b6563ae6..db2bcc48cfcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -700,8 +700,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
 	return err;
 }
 
-static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
-				 int next_state)
+int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
 {
 	struct mlx5_core_dev *mdev = rq->mdev;
 
@@ -812,7 +811,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 	return -ETIMEDOUT;
 }
 
-static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 {
 	__be16 wqe_ix_be;
 	u16 wqe_ix;
@@ -891,7 +890,7 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	return err;
 }
 
-static void mlx5e_activate_rq(struct mlx5e_rq *rq)
+void mlx5e_activate_rq(struct mlx5e_rq *rq)
 {
 	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	mlx5e_trigger_irq(&rq->channel->icosq);
@@ -906,6 +905,7 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
 void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	cancel_work_sync(&rq->dim.work);
+	cancel_work_sync(&rq->channel->icosq.recover_work);
 	mlx5e_destroy_rq(rq);
 	mlx5e_free_rx_descs(rq);
 	mlx5e_free_rq(rq);
@@ -1022,6 +1022,14 @@ static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 	return 0;
 }
 
+static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+	struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+					      recover_work);
+
+	mlx5e_reporter_icosq_cqe_err(sq);
+}
+
 static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 			     struct mlx5e_sq_param *param,
 			     struct mlx5e_icosq *sq)
@@ -1044,6 +1052,8 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 	if (err)
 		goto err_sq_wq_destroy;
 
+	INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
+
 	return 0;
 
 err_sq_wq_destroy:
@@ -1389,12 +1399,12 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	return err;
 }
 
-static void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
+void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
 {
 	set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
 }
 
-static void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
 {
 	struct mlx5e_channel *c = icosq->channel;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 60570b442fff..0f6033ea475d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -615,6 +615,8 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
 			netdev_WARN_ONCE(cq->channel->netdev,
 					 "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
+			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+				queue_work(cq->channel->priv->wq, &sq->recover_work);
 			break;
 		}
 		do {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 94a32c76c182..18e4c162256a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -109,6 +109,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
@@ -220,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->rx_cache_waive += rq_stats->cache_waive;
 		s->rx_congst_umr  += rq_stats->congst_umr;
 		s->rx_arfs_err    += rq_stats->arfs_err;
+		s->rx_recover     += rq_stats->recover;
 		s->ch_events      += ch_stats->events;
 		s->ch_poll        += ch_stats->poll;
 		s->ch_arm         += ch_stats->arm;
@@ -1298,6 +1300,7 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
 };
 
 static const struct counter_desc sq_stats_desc[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index bf645d42c833..c281e567711d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -116,6 +116,7 @@ struct mlx5e_sw_stats {
 	u64 rx_cache_waive;
 	u64 rx_congst_umr;
 	u64 rx_arfs_err;
+	u64 rx_recover;
 	u64 ch_events;
 	u64 ch_poll;
 	u64 ch_arm;
@@ -249,6 +250,7 @@ struct mlx5e_rq_stats {
 	u64 cache_waive;
 	u64 congst_umr;
 	u64 arfs_err;
+	u64 recover;
 };
 
 struct mlx5e_sq_stats {
-- 
2.21.0


^ permalink raw reply related

* [net-next 09/16] net/mlx5e: Split open/close ICOSQ into stages
From: Saeed Mahameed @ 2019-08-15 19:10 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Align ICOSQ open/close behaviour with RQ and SQ. Split open flow into
open and activate where open handles creation and activate enables the
queue. Do a symmetric thing in close flow: split into close and
deactivate.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en/xsk/setup.c         |  2 ++
 .../ethernet/mellanox/mlx5/core/en/xsk/tx.c   |  7 +++++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 +++++++++++++++----
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index f701e4f3c076..18d59ab2cf86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -150,6 +150,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
 
 void mlx5e_activate_xsk(struct mlx5e_channel *c)
 {
+	mlx5e_activate_icosq(&c->xskicosq);
 	set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
 	/* TX queue is created active. */
 	mlx5e_trigger_irq(&c->xskicosq);
@@ -159,6 +160,7 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
 {
 	mlx5e_deactivate_rq(&c->xskrq);
 	/* TX queue is disabled on close. */
+	mlx5e_deactivate_icosq(&c->xskicosq);
 }
 
 static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 35e188cf4ea4..fd2c75b4b519 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -26,6 +26,13 @@ int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
 		return -ENXIO;
 
 	if (!napi_if_scheduled_mark_missed(&c->napi)) {
+		/* To avoid WQE overrun, don't post a NOP if XSKICOSQ is not
+		 * active and not polled by NAPI. Return 0, because the upcoming
+		 * activate will trigger the IRQ for us.
+		 */
+		if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state)))
+			return 0;
+
 		spin_lock(&c->xskicosq_lock);
 		mlx5e_trigger_irq(&c->xskicosq);
 		spin_unlock(&c->xskicosq_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 006e33e718d9..b9a7b6563ae6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1376,7 +1376,6 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	csp.cqn             = sq->cq.mcq.cqn;
 	csp.wq_ctrl         = &sq->wq_ctrl;
 	csp.min_inline_mode = params->tx_min_inline_mode;
-	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
 	if (err)
 		goto err_free_icosq;
@@ -1390,12 +1389,22 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	return err;
 }
 
-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+static void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
 {
-	struct mlx5e_channel *c = sq->channel;
+	set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+}
 
-	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+static void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+{
+	struct mlx5e_channel *c = icosq->channel;
+
+	clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
 	napi_synchronize(&c->napi);
+}
+
+void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+{
+	struct mlx5e_channel *c = sq->channel;
 
 	mlx5e_destroy_sq(c->mdev, sq->sqn);
 	mlx5e_free_icosq(sq);
@@ -1972,6 +1981,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
 
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_activate_txqsq(&c->sq[tc]);
+	mlx5e_activate_icosq(&c->icosq);
 	mlx5e_activate_rq(&c->rq);
 	netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
 
@@ -1987,6 +1997,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 		mlx5e_deactivate_xsk(c);
 
 	mlx5e_deactivate_rq(&c->rq);
+	mlx5e_deactivate_icosq(&c->icosq);
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_deactivate_txqsq(&c->sq[tc]);
 }
-- 
2.21.0


^ permalink raw reply related

* [net-next 06/16] net/mlx5e: Add cq info to tx reporter diagnose
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Add cq information to general diagnose output: CQ size and stride size.
Per SQ add information about the related CQ: cqn and CQ's HW status.

$ devlink health diagnose pci/0000:00:0b.0 reporter tx
 Common Config:
   SQ:
     stride size: 64 size: 1024
   CQ:
     stride size: 64 size: 1024
 SQs:
   channel ix: 0 tc: 0 txq ix: 0 sqn: 4307 HW state: 1 stopped: false cc: 0 pc: 0
   CQ:
     cqn: 1030 HW status: 0
   channel ix: 1 tc: 0 txq ix: 1 sqn: 4312 HW state: 1 stopped: false cc: 0 pc: 0
   CQ:
     cqn: 1034 HW status: 0
   channel ix: 2 tc: 0 txq ix: 2 sqn: 4317 HW state: 1 stopped: false cc: 0 pc: 0
   CQ:
     cqn: 1038 HW status: 0
   channel ix: 3 tc: 0 txq ix: 3 sqn: 4322 HW state: 1 stopped: false cc: 0 pc: 0
   CQ:
     cqn: 1042 HW status: 0

$ devlink health diagnose pci/0000:00:0b.0 reporter tx -jp
{
    "Common Config": {
        "SQ": {
            "stride size": 64,
            "size": 1024
        },
        "CQ": {
            "stride size": 64,
            "size": 1024
        }
    },
    "SQs": [ {
            "channel ix": 0,
            "tc": 0,
            "txq ix": 0,
            "sqn": 4307,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0,
            "CQ": {
                "cqn": 1030,
                "HW status": 0
            }
        },{
            "channel ix": 1,
            "tc": 0,
            "txq ix": 1,
            "sqn": 4312,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0,
            "CQ": {
                "cqn": 1034,
                "HW status": 0
            }
        },{
            "channel ix": 2,
            "tc": 0,
            "txq ix": 2,
            "sqn": 4317,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0,
            "CQ": {
                "cqn": 1038,
                "HW status": 0
            }
        },{
            "channel ix": 3,
            "tc": 0,
            "txq ix": 3,
            "sqn": 4322,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0,
            "CQ": {
                "cqn": 1042,
                "HW status": 0
        } ]
}

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/health.c   | 62 +++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en/health.h   |  2 +
 .../mellanox/mlx5/core/en/reporter_tx.c       |  8 +++
 drivers/net/ethernet/mellanox/mlx5/core/wq.c  |  5 ++
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  |  1 +
 5 files changed, 78 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 1d9a138f02ab..63bf94cd5b67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -34,6 +34,68 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
 	return 0;
 }
 
+int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = cq->channel->priv;
+	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
+	u8 hw_status;
+	void *cqc;
+	int err;
+
+	err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
+	if (err)
+		return err;
+
+	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+	hw_status = MLX5_GET(cqc, cqc, status);
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+	u8 cq_log_stride;
+	u32 cq_sz;
+	int err;
+
+	cq_sz = mlx5_cqwq_get_size(&cq->wq);
+	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
 {
 	struct mlx5_core_dev *mdev = channel->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 112771ad516c..6725d417aaf5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -11,6 +11,8 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
 int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
 
+int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
 int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
 int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 1fe6da1dea5b..f37c8a88cb74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -185,6 +185,10 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 	if (err)
 		return err;
 
+	err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg);
+	if (err)
+		return err;
+
 	err = devlink_fmsg_obj_nest_end(fmsg);
 	if (err)
 		return err;
@@ -229,6 +233,10 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 	if (err)
 		goto unlock;
 
+	err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg);
+	if (err)
+		goto unlock;
+
 	err = mlx5e_reporter_named_obj_nest_end(fmsg);
 	if (err)
 		goto unlock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 953cc8efba69..dd2315ce4441 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -44,6 +44,11 @@ u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 	return wq->fbc.sz_m1 + 1;
 }
 
+u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
+{
+	return wq->fbc.log_stride;
+}
+
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
 {
 	return (u32)wq->fbc.sz_m1 + 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index f1ec58c9e9e3..55791f71a778 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -89,6 +89,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
+u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *wqc, struct mlx5_wq_ll *wq,
-- 
2.21.0


^ permalink raw reply related

* [net-next 07/16] net/mlx5e: Add helper functions for reporter's basics
From: Saeed Mahameed @ 2019-08-15 19:10 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Introduce helper functions for create and destroy reporters and update
channels. In the following patch, rx reporter is added and it will use
these helpers too.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/health.c | 17 +++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/health.h |  4 ++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c   |  9 +++------
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 63bf94cd5b67..c003757fbec0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -96,6 +96,23 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *
 	return 0;
 }
 
+int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
+{
+	return  mlx5e_reporter_tx_create(priv);
+}
+
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
+{
+	mlx5e_reporter_tx_destroy(priv);
+}
+
+void mlx5e_health_channels_update(struct mlx5e_priv *priv)
+{
+	if (priv->tx_reporter)
+		devlink_health_reporter_state_update(priv->tx_reporter,
+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+}
+
 int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
 {
 	struct mlx5_core_dev *mdev = channel->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 6725d417aaf5..b2c0ccc79b22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -29,5 +29,9 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
 int mlx5e_health_report(struct mlx5e_priv *priv,
 			struct devlink_health_reporter *reporter, char *err_str,
 			struct mlx5e_err_ctx *err_ctx);
+int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_channels_update(struct mlx5e_priv *priv);
+
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index aa71530a043c..656e9be4f301 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2324,10 +2324,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 			goto err_close_channels;
 	}
 
-	if (priv->tx_reporter)
-		devlink_health_reporter_state_update(priv->tx_reporter,
-						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
-
+	mlx5e_health_channels_update(priv);
 	kvfree(cparam);
 	return 0;
 
@@ -3202,7 +3199,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
 	int tc;
 
-	mlx5e_reporter_tx_destroy(priv);
 	for (tc = 0; tc < priv->profile->max_tc; tc++)
 		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
@@ -4970,12 +4966,14 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 	mlx5e_build_nic_netdev(netdev);
 	mlx5e_build_tc2txq_maps(priv);
+	mlx5e_health_create_reporters(priv);
 
 	return 0;
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+	mlx5e_health_destroy_reporters(priv);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
 	mlx5e_netdev_cleanup(priv->netdev, priv);
@@ -5078,7 +5076,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_initialize(priv);
 #endif
-	mlx5e_reporter_tx_create(priv);
 	return 0;
 }
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 04/16] net/mlx5e: Extend tx diagnose function
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

The following patches in the set enhance the diagnostics info of tx
reporter. Therefore, it is better to pass a pointer to the SQ for
further data extraction.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en/reporter_tx.c       | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 5731ea3c1600..411813a457a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -138,15 +138,22 @@ static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
 
 static int
 mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
-					u32 sqn, u8 state, bool stopped)
+					struct mlx5e_txqsq *sq)
 {
+	struct mlx5e_priv *priv = sq->channel->priv;
+	bool stopped = netif_xmit_stopped(sq->txq);
+	u8 state;
 	int err;
 
+	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+	if (err)
+		return err;
+
 	err = devlink_fmsg_obj_nest_start(fmsg);
 	if (err)
 		return err;
 
-	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn);
+	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
 	if (err)
 		return err;
 
@@ -183,15 +190,8 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 	for (i = 0; i < priv->channels.num * priv->channels.params.num_tc;
 	     i++) {
 		struct mlx5e_txqsq *sq = priv->txq2sq[i];
-		u8 state;
-
-		err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
-		if (err)
-			goto unlock;
 
-		err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn,
-							      state,
-							      netif_xmit_stopped(sq->txq));
+		err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq);
 		if (err)
 			goto unlock;
 	}
-- 
2.21.0


^ permalink raw reply related

* Re: [pull request][net 0/2] Mellanox, mlx5 fixes 2019-08-15
From: David Miller @ 2019-08-15 19:10 UTC (permalink / raw)
  To: saeedm; +Cc: netdev
In-Reply-To: <20190815184639.8206-1-saeedm@mellanox.com>

From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 15 Aug 2019 18:47:04 +0000

> This series introduces two fixes to mlx5 driver.
> 
> 1) Eran fixes a compatibility issue with ethtool flash.
> 2) Maxim fixes a race in XSK wakeup flow.
> 
> Please pull and let me know if there is any problem.

Pulled, thank you.

^ permalink raw reply

* [net-next 05/16] net/mlx5e: Extend tx reporter diagnostics output
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Enhance tx reporter's diagnostics output to include: information common
to all SQs: SQ size, SQ stride size.
In addition add channel ix, tc, txq ix, cc and pc.

$ devlink health diagnose pci/0000:00:0b.0 reporter tx
 Common config:
   SQ:
     stride size: 64 size: 1024
 SQs:
   channel ix: 0 tc: 0 txq ix: 0 sqn: 4307 HW state: 1 stopped: false cc: 0 pc: 0
   channel ix: 1 tc: 0 txq ix: 1 sqn: 4312 HW state: 1 stopped: false cc: 0 pc: 0
   channel ix: 2 tc: 0 txq ix: 2 sqn: 4317 HW state: 1 stopped: false cc: 0 pc: 0
   channel ix: 3 tc: 0 txq ix: 3 sqn: 4322 HW state: 1 stopped: false cc: 0 pc: 0

$ devlink health diagnose pci/0000:00:0b.0 reporter tx -jp
{
    "Common config": {
        "SQ": {
            "stride size": 64,
            "size": 1024
        }
    },
    "SQs": [ {
            "channel ix": 0,
            "tc": 0,
            "txq ix": 0,
            "sqn": 4307,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0
        },{
            "channel ix": 1,
            "tc": 0,
            "txq ix": 1,
            "sqn": 4312,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0
        },{
            "channel ix": 2,
            "tc": 0,
            "txq ix": 2,
            "sqn": 4317,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0
        },{
            "channel ix": 3,
            "tc": 0,
            "txq ix": 3,
            "sqn": 4322,
            "HW state": 1,
            "stopped": false,
            "cc": 0,
            "pc": 0
         } ]
}

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/health.c   | 30 ++++++++
 .../ethernet/mellanox/mlx5/core/en/health.h   |  3 +
 .../mellanox/mlx5/core/en/reporter_tx.c       | 69 ++++++++++++++++---
 3 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 60166e5432ae..1d9a138f02ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -4,6 +4,36 @@
 #include "health.h"
 #include "lib/eq.h"
 
+int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
 {
 	struct mlx5_core_dev *mdev = channel->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 386bda6104aa..112771ad516c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -11,6 +11,9 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
 int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
 
+int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
+int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
+
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
 
 struct mlx5e_err_ctx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 411813a457a7..1fe6da1dea5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -138,7 +138,7 @@ static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
 
 static int
 mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
-					struct mlx5e_txqsq *sq)
+					struct mlx5e_txqsq *sq, int tc)
 {
 	struct mlx5e_priv *priv = sq->channel->priv;
 	bool stopped = netif_xmit_stopped(sq->txq);
@@ -153,6 +153,18 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 	if (err)
 		return err;
 
+	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
+	if (err)
+		return err;
+
 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
 	if (err)
 		return err;
@@ -165,6 +177,14 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 	if (err)
 		return err;
 
+	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
+	if (err)
+		return err;
+
 	err = devlink_fmsg_obj_nest_end(fmsg);
 	if (err)
 		return err;
@@ -176,24 +196,57 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 				      struct devlink_fmsg *fmsg)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-	int i, err = 0;
+	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+	u32 sq_stride, sq_sz;
+
+	int i, tc, err = 0;
 
 	mutex_lock(&priv->state_lock);
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		goto unlock;
 
+	sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq);
+	sq_stride = MLX5_SEND_WQE_BB;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config");
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
 	if (err)
 		goto unlock;
 
-	for (i = 0; i < priv->channels.num * priv->channels.params.num_tc;
-	     i++) {
-		struct mlx5e_txqsq *sq = priv->txq2sq[i];
+	for (i = 0; i < priv->channels.num; i++) {
+		struct mlx5e_channel *c = priv->channels.c[i];
+
+		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+			struct mlx5e_txqsq *sq = &c->sq[tc];
 
-		err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq);
-		if (err)
-			goto unlock;
+			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
+			if (err)
+				goto unlock;
+		}
 	}
 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
 	if (err)
-- 
2.21.0


^ permalink raw reply related

* [net-next 03/16] net/mlx5e: Generalize tx reporter's functionality
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Prepare for code sharing with rx reporter, which is added in the
following patches in the set. Introduce a generic error_ctx for
agnostic recovery despatch.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   5 +-
 .../ethernet/mellanox/mlx5/core/en/health.c   |  82 ++++++++++++
 .../ethernet/mellanox/mlx5/core/en/health.h   |  14 ++
 .../mellanox/mlx5/core/en/reporter_tx.c       | 124 ++++--------------
 4 files changed, 124 insertions(+), 101 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/health.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 57d2cc666fe3..23d566a45a30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -23,8 +23,9 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 #
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
-		en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
-		en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
+		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
+		en/reporter_tx.o en/params.o en/xsk/umem.o en/xsk/setup.o \
+		en/xsk/rx.o en/xsk/tx.o
 
 #
 # Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
new file mode 100644
index 000000000000..60166e5432ae
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "lib/eq.h"
+
+int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
+{
+	struct mlx5_core_dev *mdev = channel->mdev;
+	struct net_device *dev = channel->netdev;
+	struct mlx5e_modify_sq_param msp = {0};
+	int err;
+
+	msp.curr_state = MLX5_SQC_STATE_ERR;
+	msp.next_state = MLX5_SQC_STATE_RST;
+
+	err = mlx5e_modify_sq(mdev, sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
+		return err;
+	}
+
+	memset(&msp, 0, sizeof(msp));
+	msp.curr_state = MLX5_SQC_STATE_RST;
+	msp.next_state = MLX5_SQC_STATE_RDY;
+
+	err = mlx5e_modify_sq(mdev, sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
+		return err;
+	}
+
+	return 0;
+}
+
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
+{
+	int err = 0;
+
+	rtnl_lock();
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto out;
+
+	err = mlx5e_safe_reopen_channels(priv);
+
+out:
+	mutex_unlock(&priv->state_lock);
+	rtnl_unlock();
+
+	return err;
+}
+
+int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
+{
+	u32 eqe_count;
+
+	netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+	eqe_count = mlx5_eq_poll_irq_disabled(eq);
+	if (!eqe_count)
+		return -EIO;
+
+	netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
+		   eqe_count, eq->core.eqn);
+
+	channel->stats->eq_rearm++;
+	return 0;
+}
+
+int mlx5e_health_report(struct mlx5e_priv *priv,
+			struct devlink_health_reporter *reporter, char *err_str,
+			struct mlx5e_err_ctx *err_ctx)
+{
+	if (!reporter) {
+		netdev_err(priv->netdev, err_str);
+		return err_ctx->recover(&err_ctx->ctx);
+	}
+	return devlink_health_report(reporter, err_str, err_ctx);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index c7a5a149011e..386bda6104aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -11,4 +11,18 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
 int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
 
+#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
+
+struct mlx5e_err_ctx {
+	int (*recover)(void *ctx);
+	void *ctx;
+};
+
+int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn);
+int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel);
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
+int mlx5e_health_report(struct mlx5e_priv *priv,
+			struct devlink_health_reporter *reporter, char *err_str,
+			struct mlx5e_err_ctx *err_ctx);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index fc308efa2e3c..5731ea3c1600 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -2,14 +2,6 @@
 /* Copyright (c) 2019 Mellanox Technologies. */
 
 #include "health.h"
-#include "lib/eq.h"
-
-#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
-
-struct mlx5e_tx_err_ctx {
-	int (*recover)(struct mlx5e_txqsq *sq);
-	struct mlx5e_txqsq *sq;
-};
 
 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
 {
@@ -39,37 +31,9 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
 	sq->pc = 0;
 }
 
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
-	struct mlx5_core_dev *mdev = sq->channel->mdev;
-	struct net_device *dev = sq->channel->netdev;
-	struct mlx5e_modify_sq_param msp = {0};
-	int err;
-
-	msp.curr_state = curr_state;
-	msp.next_state = MLX5_SQC_STATE_RST;
-
-	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-	if (err) {
-		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
-		return err;
-	}
-
-	memset(&msp, 0, sizeof(msp));
-	msp.curr_state = MLX5_SQC_STATE_RST;
-	msp.next_state = MLX5_SQC_STATE_RDY;
-
-	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-	if (err) {
-		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
-		return err;
-	}
-
-	return 0;
-}
-
-static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
+static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
 {
+	struct mlx5e_txqsq *sq = ctx;
 	struct mlx5_core_dev *mdev = sq->channel->mdev;
 	struct net_device *dev = sq->channel->netdev;
 	u8 state;
@@ -101,7 +65,7 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
 	 * pending WQEs. SQ can safely reset the SQ.
 	 */
 
-	err = mlx5e_sq_to_ready(sq, state);
+	err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn);
 	if (err)
 		return err;
 
@@ -112,102 +76,64 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
 	return 0;
 }
 
-static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
-				 char *err_str,
-				 struct mlx5e_tx_err_ctx *err_ctx)
-{
-	if (!tx_reporter) {
-		netdev_err(err_ctx->sq->channel->netdev, err_str);
-		return err_ctx->recover(err_ctx->sq);
-	}
-
-	return devlink_health_report(tx_reporter, err_str, err_ctx);
-}
-
 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
 {
-	char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
-	struct mlx5e_tx_err_ctx err_ctx = {0};
+	struct mlx5e_priv *priv = sq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {0};
 
-	err_ctx.sq       = sq;
-	err_ctx.recover  = mlx5e_tx_reporter_err_cqe_recover;
+	err_ctx.ctx = sq;
+	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
 	sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
 
-	mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str,
-			      &err_ctx);
+	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
 }
 
-static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
+static int mlx5e_tx_reporter_timeout_recover(void *ctx)
 {
+	struct mlx5e_txqsq *sq = ctx;
 	struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
-	u32 eqe_count;
-
-	netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+	int err;
 
-	eqe_count = mlx5_eq_poll_irq_disabled(eq);
-	if (!eqe_count) {
+	err = mlx5e_health_channel_eq_recover(eq, sq->channel);
+	if (err)
 		clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-		return -EIO;
-	}
 
-	netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
-		   eqe_count, eq->core.eqn);
-	sq->channel->stats->eq_rearm++;
-	return 0;
+	return err;
 }
 
 int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
 {
-	char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
-	struct mlx5e_tx_err_ctx err_ctx;
+	struct mlx5e_priv *priv = sq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx;
 
-	err_ctx.sq       = sq;
-	err_ctx.recover  = mlx5e_tx_reporter_timeout_recover;
+	err_ctx.ctx = sq;
+	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
 	sprintf(err_str,
 		"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
 		sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
 		jiffies_to_usecs(jiffies - sq->txq->trans_start));
 
-	return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str,
-				     &err_ctx);
+	return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
 }
 
 /* state lock cannot be grabbed within this function.
  * It can cause a dead lock or a read-after-free.
  */
-static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
-{
-	return err_ctx->recover(err_ctx->sq);
-}
-
-static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
+static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 {
-	int err = 0;
-
-	rtnl_lock();
-	mutex_lock(&priv->state_lock);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		goto out;
-
-	err = mlx5e_safe_reopen_channels(priv);
-
-out:
-	mutex_unlock(&priv->state_lock);
-	rtnl_unlock();
-
-	return err;
+	return err_ctx->recover(err_ctx->ctx);
 }
 
 static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
 				     void *context)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-	struct mlx5e_tx_err_ctx *err_ctx = context;
+	struct mlx5e_err_ctx *err_ctx = context;
 
 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
-			 mlx5e_tx_reporter_recover_all(priv);
+			 mlx5e_health_recover_channels(priv);
 }
 
 static int
-- 
2.21.0


^ permalink raw reply related

* [net-next 02/16] net/mlx5e: Change naming convention for reporter's functions
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Change from mlx5e_tx_reporter_* to mlx5e_reporter_tx_*. In the following
patches in the set rx reporter is added, the new naming convention is
more uniformed.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/health.h      | 8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c        | 8 ++++----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index cee840e40a05..c7a5a149011e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -6,9 +6,9 @@
 
 #include "en.h"
 
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv);
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq);
-int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq);
+int mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 7aac9cc92181..fc308efa2e3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -124,7 +124,7 @@ static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
 	return devlink_health_report(tx_reporter, err_str, err_ctx);
 }
 
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
 {
 	char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
 	struct mlx5e_tx_err_ctx err_ctx = {0};
@@ -157,7 +157,7 @@ static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
 	return 0;
 }
 
-int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
 {
 	char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
 	struct mlx5e_tx_err_ctx err_ctx;
@@ -286,7 +286,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
 
 #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
 
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
+int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
 {
 	struct devlink_health_reporter *reporter;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -306,7 +306,7 @@ int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
 	return 0;
 }
 
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
 {
 	if (!priv->tx_reporter)
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9fc7bc93d607..aa71530a043c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1380,7 +1380,7 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
 	struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
 					      recover_work);
 
-	mlx5e_tx_reporter_err_cqe(sq);
+	mlx5e_reporter_tx_err_cqe(sq);
 }
 
 int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
@@ -3202,7 +3202,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
 	int tc;
 
-	mlx5e_tx_reporter_destroy(priv);
+	mlx5e_reporter_tx_destroy(priv);
 	for (tc = 0; tc < priv->profile->max_tc; tc++)
 		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
@@ -4272,7 +4272,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
 		if (!netif_xmit_stopped(dev_queue))
 			continue;
 
-		if (mlx5e_tx_reporter_timeout(sq))
+		if (mlx5e_reporter_tx_timeout(sq))
 			report_failed = true;
 	}
 
@@ -5078,7 +5078,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_initialize(priv);
 #endif
-	mlx5e_tx_reporter_create(priv);
+	mlx5e_reporter_tx_create(priv);
 	return 0;
 }
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 01/16] net/mlx5e: Rename reporter header file
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815190911.12050-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Rename reporter.h -> health.h so patches in the set can use it for
health related functionality.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/{reporter.h => health.h}   | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c      | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename drivers/net/ethernet/mellanox/mlx5/core/en/{reporter.h => health.h} (84%)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
similarity index 84%
rename from drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
rename to drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index ed7a3881d2c5..cee840e40a05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2019 Mellanox Technologies. */
 
-#ifndef __MLX5E_EN_REPORTER_H
-#define __MLX5E_EN_REPORTER_H
+#ifndef __MLX5E_EN_HEALTH_H
+#define __MLX5E_EN_HEALTH_H
 
 #include "en.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 6e54fefea410..7aac9cc92181 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2019 Mellanox Technologies. */
 
-#include "reporter.h"
+#include "health.h"
 #include "lib/eq.h"
 
 #define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9a2fcef6e7f0..9fc7bc93d607 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -56,7 +56,7 @@
 #include "en/xdp.h"
 #include "lib/eq.h"
 #include "en/monitor_stats.h"
-#include "en/reporter.h"
+#include "en/health.h"
 #include "en/params.h"
 #include "en/xsk/umem.h"
 #include "en/xsk/setup.h"
-- 
2.21.0


^ permalink raw reply related

* [pull request][net-next 00/16] Mellanox, mlx5 devlink RX health reporters
From: Saeed Mahameed @ 2019-08-15 19:09 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev@vger.kernel.org, Saeed Mahameed

Hi Dave,

This series is adding a new devlink health reporter for RX related
errors from Aya.

Last two patches from Vlad and Gavi, are trivial fixes for previously
submitted patches on this release cycle.

For more information please see tag log below.

Please pull and let me know if there is any problem.

Thanks,
Saeed.

---
The following changes since commit 873343e7d4964fa70cac22e2f7653f510bfaaa11:

  page_pool: remove unnecessary variable init (2019-08-15 11:50:37 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2019-08-15

for you to fetch changes up to 7ba590ca7d0c634f6706ba02c8bff0a5f41305e9:

  net/mlx5: Fix the order of fc_stats cleanup (2019-08-15 12:02:13 -0700)

----------------------------------------------------------------
mlx5-updates-2019-08-15

This patchset introduces changes in mlx5 devlink health reporters.
The highlight of these changes is adding a new reporter: RX reporter

mlx5 RX reporter: reports and recovers from timeouts and RX completion
error.

1) Perform TX reporter cleanup. In order to maintain the
code flow as similar as possible between RX and TX reporters, start the
set with cleanup.

2) Prepare for code sharing, generalize and move shared
functionality.

3) Refactor and extend TX reporter diagnostics information
to align the TX reporter diagnostics output with the RX reporter's
diagnostics output.

4) Add helper functions Patch 11: Add RX reporter, initially
supports only the diagnostics call back.

5) Change ICOSQ (Internal Operations Send Queue) open/close flow to
avoid race between interface down and completion error recovery.

6) Introduce recovery flows for RX ring population timeout on ICOSQ,
and for completion errors on ICOSQ and on RQ (Regular receive queues).

8) Include RX reporters in mlx5 documentation.

Last two patches of this series, are trivial fixes for previously
submitted patches on this release cycle.

----------------------------------------------------------------
Aya Levin (13):
      net/mlx5e: Rename reporter header file
      net/mlx5e: Change naming convention for reporter's functions
      net/mlx5e: Generalize tx reporter's functionality
      net/mlx5e: Extend tx diagnose function
      net/mlx5e: Extend tx reporter diagnostics output
      net/mlx5e: Add cq info to tx reporter diagnose
      net/mlx5e: Add helper functions for reporter's basics
      net/mlx5e: Add support to rx reporter diagnose
      net/mlx5e: Split open/close ICOSQ into stages
      net/mlx5e: Report and recover from CQE error on ICOSQ
      net/mlx5e: Report and recover from rx timeout
      net/mlx5e: Report and recover from CQE with error on RQ
      Documentation: net: mlx5: Devlink health documentation updates

Gavi Teitz (1):
      net/mlx5: Fix the order of fc_stats cleanup

Saeed Mahameed (1):
      net/mlx5e: RX, Handle CQE with error at the earliest stage

Vlad Buslov (1):
      net/mlx5e: Fix deallocation of non-fully init encap entries

 .../networking/device_drivers/mellanox/mlx5.rst    |  33 +-
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   5 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  32 ++
 .../net/ethernet/mellanox/mlx5/core/en/health.c    | 205 +++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/health.h    |  53 +++
 .../net/ethernet/mellanox/mlx5/core/en/reporter.h  |  14 -
 .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 393 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   | 225 ++++++------
 .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |   2 +
 .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.c    |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  82 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    |  62 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  12 +-
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  |   9 +-
 drivers/net/ethernet/mellanox/mlx5/core/wq.c       |   5 +
 drivers/net/ethernet/mellanox/mlx5/core/wq.h       |   1 +
 18 files changed, 941 insertions(+), 204 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/health.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/health.h
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

^ permalink raw reply

* Re: [PATCH net-next v2 0/4] net/rds: Fixes from internal Oracle repo
From: David Miller @ 2019-08-15 19:04 UTC (permalink / raw)
  To: gerd.rausch; +Cc: santosh.shilimkar, netdev, linux-rdma, rds-devel
In-Reply-To: <ee77e550-2231-be7f-861f-31d609631e9f@oracle.com>

From: Gerd Rausch <gerd.rausch@oracle.com>
Date: Thu, 15 Aug 2019 07:40:22 -0700

> This is the first set of (mostly old) patches from our internal repository
> in an effort to synchronize what Oracle had been using internally
> with what is shipped with the Linux kernel.

Series applied.

^ permalink raw reply

* Re: [patch net-next v4 0/2] netdevsim: implement support for devlink region and snapshots
From: David Miller @ 2019-08-15 19:02 UTC (permalink / raw)
  To: jiri; +Cc: netdev, jakub.kicinski, mlxsw
In-Reply-To: <20190815134634.9858-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Aug 2019 15:46:32 +0200

> From: Jiri Pirko <jiri@mellanox.com>
> 
> Implement devlink region support for netdevsim and test it.
> 
> ---
> Note the selftest patch depends on "[patch net-next] selftests:
> netdevsim: add devlink params tests" patch sent earlier today.

Series applied.

^ permalink raw reply

* Re: [patch net-next v3 0/2] selftests: netdevsim: add devlink paramstests
From: David Miller @ 2019-08-15 19:01 UTC (permalink / raw)
  To: jiri; +Cc: netdev, jakub.kicinski, mlxsw
In-Reply-To: <20190815134229.8884-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Aug 2019 15:42:27 +0200

> From: Jiri Pirko <jiri@mellanox.com>
> 
> The first patch is just a helper addition as a dependency of the actual
> test in patch number two.

Series applied.

^ permalink raw reply

* Re: [PATCH net-next] page_pool: remove unnecessary variable init
From: David Miller @ 2019-08-15 18:50 UTC (permalink / raw)
  To: linyunsheng; +Cc: hawk, ilias.apalodimas, netdev, linux-kernel
In-Reply-To: <1565872860-63524-1-git-send-email-linyunsheng@huawei.com>

From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Thu, 15 Aug 2019 20:41:00 +0800

> Remove variable initializations in functions that
> are followed by assignments before use
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] r8169: sync EEE handling for RTL8168h with vendor driver
From: David Miller @ 2019-08-15 18:49 UTC (permalink / raw)
  To: hkallweit1; +Cc: nic_swsd, netdev
In-Reply-To: <79a1db61-3aab-065b-9e18-0094c5023300@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 Aug 2019 14:21:30 +0200

> Sync EEE init for RTL8168h with vendor driver and add two writes to
> vendor-specific registers.
> 
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next v2 0/2] net: phy: realtek: map vendor-specific EEE registers to standard MMD registers
From: David Miller @ 2019-08-15 18:48 UTC (permalink / raw)
  To: hkallweit1; +Cc: andrew, f.fainelli, netdev
In-Reply-To: <88a71ee7-a17d-ac9c-c998-d0ea35e5c566@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 Aug 2019 14:12:10 +0200

> EEE-related registers on newer integrated PHY's have the standard
> layout, but are accessible not via MMD but via vendor-specific
> registers. Emulating the standard MMD registers allows to use the
> generic functions for EEE control and to significantly simplify
> the r8169 driver.
> 
> v2:
> - rebase patch 2

Series applied, thanks Heiner.

^ permalink raw reply

* [net 2/2] net/mlx5e: Fix compatibility issue with ethtool flash device
From: Saeed Mahameed @ 2019-08-15 18:47 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Eran Ben Elisha, Jiri Pirko,
	Saeed Mahameed
In-Reply-To: <20190815184639.8206-1-saeedm@mellanox.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

Cited patch deleted ethtool flash device support, as ethtool core can
fallback into devlink flash callback. However, this is supported only if
there is a devlink port registered over the corresponding netdevice.

As mlx5e do not have devlink port support over native netdevice, it broke
the ability to flash device via ethtool.

This patch re-add the ethtool callback to avoid user functionality breakage
when trying to flash device via ethtool.

Fixes: 9c8bca2637b8 ("mlx5: Move firmware flash implementation to devlink")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  2 ++
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 35 +++++++++++++++++++
 .../mellanox/mlx5/core/ipoib/ethtool.c        |  9 +++++
 3 files changed, 46 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f6b64a03cd06..65bec19a438f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1105,6 +1105,8 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
 u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
 int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 			      struct ethtool_ts_info *info);
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+			       struct ethtool_flash *flash);
 void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
 				  struct ethtool_pauseparam *pauseparam);
 int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index e89dba790a2d..20e628c907e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1690,6 +1690,40 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+			       struct ethtool_flash *flash)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct net_device *dev = priv->netdev;
+	const struct firmware *fw;
+	int err;
+
+	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+		return -EOPNOTSUPP;
+
+	err = request_firmware_direct(&fw, flash->data, &dev->dev);
+	if (err)
+		return err;
+
+	dev_hold(dev);
+	rtnl_unlock();
+
+	err = mlx5_firmware_flash(mdev, fw, NULL);
+	release_firmware(fw);
+
+	rtnl_lock();
+	dev_put(dev);
+	return err;
+}
+
+static int mlx5e_flash_device(struct net_device *dev,
+			      struct ethtool_flash *flash)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	return mlx5e_ethtool_flash_device(priv, flash);
+}
+
 static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
 				     bool is_rx_cq)
 {
@@ -1972,6 +2006,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.set_wol	   = mlx5e_set_wol,
 	.get_module_info   = mlx5e_get_module_info,
 	.get_module_eeprom = mlx5e_get_module_eeprom,
+	.flash_device      = mlx5e_flash_device,
 	.get_priv_flags    = mlx5e_get_priv_flags,
 	.set_priv_flags    = mlx5e_set_priv_flags,
 	.self_test         = mlx5e_self_test,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index ebd81f6b556e..90cb50fe17fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -122,6 +122,14 @@ static int mlx5i_get_ts_info(struct net_device *netdev,
 	return mlx5e_ethtool_get_ts_info(priv, info);
 }
 
+static int mlx5i_flash_device(struct net_device *netdev,
+			      struct ethtool_flash *flash)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+	return mlx5e_ethtool_flash_device(priv, flash);
+}
+
 enum mlx5_ptys_width {
 	MLX5_PTYS_WIDTH_1X	= 1 << 0,
 	MLX5_PTYS_WIDTH_2X	= 1 << 1,
@@ -233,6 +241,7 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
 	.get_ethtool_stats  = mlx5i_get_ethtool_stats,
 	.get_ringparam      = mlx5i_get_ringparam,
 	.set_ringparam      = mlx5i_set_ringparam,
+	.flash_device       = mlx5i_flash_device,
 	.get_channels       = mlx5i_get_channels,
 	.set_channels       = mlx5i_set_channels,
 	.get_coalesce       = mlx5i_get_coalesce,
-- 
2.21.0


^ permalink raw reply related

* [net 1/2] net/mlx5e: Fix a race with XSKICOSQ in XSK wakeup flow
From: Saeed Mahameed @ 2019-08-15 18:47 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Maxim Mikityanskiy, Tariq Toukan,
	Saeed Mahameed
In-Reply-To: <20190815184639.8206-1-saeedm@mellanox.com>

From: Maxim Mikityanskiy <maximmi@mellanox.com>

Add a missing spinlock around XSKICOSQ usage at the activation stage,
because there is a race between a configuration change and the
application calling sendto().

Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support")
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index aaffa6f68dc0..7f78c004d12f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -143,7 +143,10 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
 {
 	set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
 	/* TX queue is created active. */
+
+	spin_lock(&c->xskicosq_lock);
 	mlx5e_trigger_irq(&c->xskicosq);
+	spin_unlock(&c->xskicosq_lock);
 }
 
 void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
-- 
2.21.0


^ permalink raw reply related

* [pull request][net 0/2] Mellanox, mlx5 fixes 2019-08-15
From: Saeed Mahameed @ 2019-08-15 18:47 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev@vger.kernel.org, Saeed Mahameed

Hi Dave,

This series introduces two fixes to mlx5 driver.

1) Eran fixes a compatibility issue with ethtool flash.
2) Maxim fixes a race in XSK wakeup flow.

Please pull and let me know if there is any problem.

Thanks,
Saeed.

---
The following changes since commit 2aafdf5a5786ebbd8ccfe132ed6267c6962c5c3c:

  selftests: net: tcp_fastopen_backup_key.sh: fix shellcheck issue (2019-08-15 11:34:32 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-fixes-2019-08-15

for you to fetch changes up to f43d48d10a42787c1de1d3facd7db210c91db1da:

  net/mlx5e: Fix compatibility issue with ethtool flash device (2019-08-15 11:43:57 -0700)

----------------------------------------------------------------
mlx5-fixes-2019-08-15

----------------------------------------------------------------
Eran Ben Elisha (1):
      net/mlx5e: Fix compatibility issue with ethtool flash device

Maxim Mikityanskiy (1):
      net/mlx5e: Fix a race with XSKICOSQ in XSK wakeup flow

 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  2 ++
 .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |  3 ++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 35 ++++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c    |  9 ++++++
 4 files changed, 49 insertions(+)

^ permalink raw reply

* Re: [PATCH net-next 2/3] tcp: ulp: add functions to dump ulp-specific information
From: Eric Dumazet @ 2019-08-15 18:46 UTC (permalink / raw)
  To: Davide Caratti, Boris Pismenny, Jakub Kicinski, John Fastabend,
	Dave Watson, Aviad Yehezkel, David S. Miller, netdev
In-Reply-To: <f9b5663d28547b0d1c187d874c7b5e5ece8fe8fa.1565882584.git.dcaratti@redhat.com>



On 8/15/19 6:00 PM, Davide Caratti wrote:

>  
> +	if (net_admin) {
> +		const struct tcp_ulp_ops *ulp_ops;
> +
> +		rcu_read_lock();
> +		ulp_ops = icsk->icsk_ulp_ops;
> +		if (ulp_ops)
> +			err = tcp_diag_put_ulp(skb, sk, ulp_ops);
> +		rcu_read_unlock();
> +		if (err)
> +			return err;
> +	}
>  	return 0;


Why is rcu_read_lock() and rcu_read_unlock() used at all ?

icsk->icsk_ulp_ops does not seem to be rcu protected ?

If this was, then an rcu_dereference() would be appropriate.


^ permalink raw reply

* Re: [PATCH] wimax/i2400m: fix a memory leak bug
From: Liam R. Howlett @ 2019-08-15 18:45 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Inaky Perez-Gonzalez,
	supporter:INTEL WIRELESS WIMAX CONNECTION 2400, David S. Miller,
	open list:NETWORKING DRIVERS, open list
In-Reply-To: <1565892301-2812-1-git-send-email-wenwen@cs.uga.edu>

* Wenwen Wang <wenwen@cs.uga.edu> [190815 14:05]:
> In i2400m_barker_db_init(), 'options_orig' is allocated through kstrdup()
> to hold the original command line options. Then, the options are parsed.
> However, if an error occurs during the parsing process, 'options_orig' is
> not deallocated, leading to a memory leak bug. To fix this issue, free
> 'options_orig' before returning the error.
> 
> Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
> ---
>  drivers/net/wimax/i2400m/fw.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
> index e9fc168..6b36f6d 100644
> --- a/drivers/net/wimax/i2400m/fw.c
> +++ b/drivers/net/wimax/i2400m/fw.c
> @@ -342,6 +342,7 @@ int i2400m_barker_db_init(const char *_options)
>  				       "a 32-bit number\n",
>  				       __func__, token);
>  				result = -EINVAL;
> +				kfree(options_orig);
>  				goto error_parse;
>  			}
>  			if (barker == 0) {
> @@ -350,8 +351,10 @@ int i2400m_barker_db_init(const char *_options)
>  				continue;
>  			}
>  			result = i2400m_barker_db_add(barker);
> -			if (result < 0)
> +			if (result < 0) {
> +				kfree(options_orig);
>  				goto error_add;

I know that you didn't add this error_add label, but it seems like the
incorrect goto label.  Although looking at the caller indicates an add
failed, this label is used prior to and after the memory leak you are
trying to fix.  It might be better to change this label to something
like error_parse_add and move the kfree to the unwinding.  If a new
label is used, it becomes more clear as to what is being undone and
there aren't two jumps into an unwind from two very different stages of
the function.  Adding a new label also has the benefit of moving the
kfree to the unwind of error_parse.

Thanks,
Liam


> +			}
>  		}
>  		kfree(options_orig);
>  	}
> -- 
> 2.7.4
> 

^ permalink raw reply

* Re: [PATCH net-next] net: phy: swphy: emulate register MII_ESTATUS
From: David Miller @ 2019-08-15 18:44 UTC (permalink / raw)
  To: hkallweit1; +Cc: andrew, f.fainelli, netdev
In-Reply-To: <25690798-5122-d5a2-7d2b-c166b8649a2e@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 Aug 2019 13:19:22 +0200

> When the genphy driver binds to a swphy it will call
> genphy_read_abilites that will try to read MII_ESTATUS if BMSR_ESTATEN
> is set in MII_BMSR. So far this would read the default value 0xffff
> and 1000FD and 1000HD are reported as supported just by chance.
> Better add explicit support for emulating MII_ESTATUS.
> 
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>

Applied.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox