Netdev List
 help / color / mirror / Atom feed
* [for-next 08/12] net/mlx5i: Use compilation flag in IPOIB header
From: Saeed Mahameed @ 2018-05-26  0:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20180526000207.19568-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

If CONFIG_MLX5_CORE_IPOIB is not set, compile-out the
IPOIB related headers.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 45a11864e544..08eac92fc26c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -33,6 +33,8 @@
 #ifndef __MLX5E_IPOB_H__
 #define __MLX5E_IPOB_H__
 
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
 #include <linux/mlx5/fs.h>
 #include "en.h"
 
@@ -120,4 +122,5 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5_av *av, u32 dqpn, u32 dqkey);
 void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
+#endif /* CONFIG_MLX5_CORE_IPOIB */
 #endif /* __MLX5E_IPOB_H__ */
-- 
2.17.0

^ permalink raw reply related

* [for-next 09/12] net/mlx5: Use order-0 allocations for all WQ types
From: Saeed Mahameed @ 2018-05-26  0:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20180526000207.19568-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

Complete the transition of all WQ types to use fragmented
order-0 coherent memory instead of high-order allocations.

CQ-WQ already uses order-0.
Here we do the same for cyclic and linked-list WQs.

This allows the driver to load cleanly on systems with a highly
fragmented coherent memory.

Performance tests:
ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
Packet rate of 64B packets, single transmit ring, size 8K.

No degradation is sensed.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 15 +--
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 17 ++--
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 24 ++---
 .../ethernet/mellanox/mlx5/core/fpga/conn.c   | 14 +--
 .../ethernet/mellanox/mlx5/core/fpga/conn.h   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/wq.c  | 94 ++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  | 33 +++----
 include/linux/mlx5/driver.h                   | 16 +++-
 9 files changed, 123 insertions(+), 94 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3c0f0a0343fd..9396db54973f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -314,7 +314,7 @@ struct mlx5e_cq {
 
 	/* control */
 	struct mlx5_core_dev      *mdev;
-	struct mlx5_frag_wq_ctrl   wq_ctrl;
+	struct mlx5_wq_ctrl        wq_ctrl;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_tx_wqe_info {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a8b1e43384ca..0c167e5fc346 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -646,8 +646,8 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
 						MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(wq, wq,  dbr_addr,		rq->wq_ctrl.db.dma);
 
-	mlx5_fill_page_array(&rq->wq_ctrl.buf,
-			     (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+	mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
 
 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
 
@@ -1096,7 +1096,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 					  MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
 
-	mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+	mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
+				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
 
 	err = mlx5_core_create_sq(mdev, in, inlen, sqn);
 
@@ -1538,7 +1539,7 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
 
 static void mlx5e_free_cq(struct mlx5e_cq *cq)
 {
-	mlx5_cqwq_destroy(&cq->wq_ctrl);
+	mlx5_wq_destroy(&cq->wq_ctrl);
 }
 
 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
@@ -1554,7 +1555,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
+		sizeof(u64) * cq->wq_ctrl.buf.npages;
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -1563,7 +1564,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 
 	memcpy(cqc, param->cqc, sizeof(param->cqc));
 
-	mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf,
+	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
 				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
@@ -1571,7 +1572,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
 	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
-	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift -
+	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 					    MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index f4d2c8886492..ac54380d41e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -383,16 +383,16 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 	return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 }
 
-static inline void mlx5e_fill_icosq_edge(struct mlx5e_icosq *sq,
-					 struct mlx5_wq_cyc *wq,
-					 u16 pi)
+static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
+					      struct mlx5_wq_cyc *wq,
+					      u16 pi, u16 frag_pi)
 {
 	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-	u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
+	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
-	/* fill sq edge with nops to avoid wqe wrapping two pages */
+	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
 	for (; wi < edge_wi; wi++) {
 		wi->opcode = MLX5_OPCODE_NOP;
 		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
@@ -407,14 +407,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi;
+	u16 pi, frag_pi;
 	int err;
 	int i;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
 
-	if (unlikely(pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_size(wq))) {
-		mlx5e_fill_icosq_edge(sq, wq, pi);
+	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
+		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index fc68e72b0b2b..d37566be06e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -296,16 +296,16 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	return -ENOMEM;
 }
 
-static inline void mlx5e_fill_sq_edge(struct mlx5e_txqsq *sq,
-				      struct mlx5_wq_cyc *wq,
-				      u16 pi)
+static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
+					   struct mlx5_wq_cyc *wq,
+					   u16 pi, u16 frag_pi)
 {
 	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-	u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
+	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
-	/* fill sq edge with nops to avoid wqe wrap around */
+	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
 	for (; wi < edge_wi; wi++) {
 		wi->skb        = NULL;
 		wi->num_wqebbs = 1;
@@ -358,8 +358,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	unsigned char *skb_data = skb->data;
 	unsigned int skb_len = skb->len;
 	u16 ds_cnt, ds_cnt_inl = 0;
+	u16 headlen, ihs, frag_pi;
 	u8 num_wqebbs, opcode;
-	u16 headlen, ihs;
 	u32 num_bytes;
 	int num_dma;
 	__be16 mss;
@@ -395,8 +395,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	if (unlikely(pi + num_wqebbs > mlx5_wq_cyc_get_size(wq))) {
-		mlx5e_fill_sq_edge(sq, wq, pi);
+	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
+	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
 		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 	}
 
@@ -642,9 +643,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 	unsigned char *skb_data = skb->data;
 	unsigned int skb_len = skb->len;
+	u16 headlen, ihs, pi, frag_pi;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
-	u16 headlen, ihs, pi;
 	u32 num_bytes;
 	int num_dma;
 	__be16 mss;
@@ -680,8 +681,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	if (unlikely(pi + num_wqebbs > mlx5_wq_cyc_get_size(wq))) {
-		mlx5e_fill_sq_edge(sq, wq, pi);
+	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
+	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
 		mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index de7fe087d6fe..4e5a5cf25f17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	}
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages;
+		sizeof(u64) * conn->cq.wq_ctrl.buf.npages;
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
@@ -469,12 +469,12 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
 	MLX5_SET(cqc, cqc, c_eqn, eqn);
 	MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
-	MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift -
+	MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
 			   MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
-	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas);
+	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
 
 	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
 	kvfree(in);
@@ -500,7 +500,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	goto out;
 
 err_cqwq:
-	mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+	mlx5_wq_destroy(&conn->cq.wq_ctrl);
 out:
 	return err;
 }
@@ -510,7 +510,7 @@ static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
 	tasklet_disable(&conn->cq.tasklet);
 	tasklet_kill(&conn->cq.tasklet);
 	mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
-	mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+	mlx5_wq_destroy(&conn->cq.wq_ctrl);
 }
 
 static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
@@ -591,8 +591,8 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
 	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
 		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
 
-	mlx5_fill_page_array(&conn->qp.wq_ctrl.buf,
-			     (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+	mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
 
 	err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
 	if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
index 44bd9eccc711..634ae10e287b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -54,7 +54,7 @@ struct mlx5_fpga_conn {
 	/* CQ */
 	struct {
 		struct mlx5_cqwq wq;
-		struct mlx5_frag_wq_ctrl wq_ctrl;
+		struct mlx5_wq_ctrl wq_ctrl;
 		struct mlx5_core_cq mcq;
 		struct tasklet_struct tasklet;
 	} cq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ea66448ba365..5b8b35392025 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -36,7 +36,12 @@
 
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 {
-	return (u32)wq->sz_m1 + 1;
+	return (u32)wq->fbc.sz_m1 + 1;
+}
+
+u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
+{
+	return (u32)wq->fbc.frag_sz_m1 + 1;
 }
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
@@ -46,12 +51,12 @@ u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
 {
-	return (u32)wq->sz_m1 + 1;
+	return (u32)wq->fbc.sz_m1 + 1;
 }
 
 static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
 {
-	return mlx5_wq_cyc_get_size(wq) << wq->log_stride;
+	return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
 }
 
 static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
@@ -67,17 +72,19 @@ static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 
 static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
 {
-	return mlx5_wq_ll_get_size(wq) << wq->log_stride;
+	return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
 }
 
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl)
 {
+	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	int err;
 
-	wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride);
-	wq->sz_m1 = (1 << MLX5_GET(wq, wqc, log_wq_sz)) - 1;
+	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
+		      MLX5_GET(wq, wqc, log_wq_sz),
+		      fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -85,14 +92,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
-				  &wq_ctrl->buf, param->buf_numa_node);
+	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.frags->buf;
+	fbc->frag_buf = wq_ctrl->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -105,17 +112,35 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 	return err;
 }
 
+static void mlx5e_qp_set_frag_buf(struct mlx5_frag_buf *buf,
+				  struct mlx5_wq_qp *qp)
+{
+	struct mlx5_frag_buf *rqb, *sqb;
+
+	rqb = &qp->rq.fbc.frag_buf;
+	*rqb = *buf;
+	rqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
+	rqb->npages = 1 << get_order(rqb->size);
+
+	sqb = &qp->sq.fbc.frag_buf;
+	*sqb = *buf;
+	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
+	sqb->npages = 1 << get_order(sqb->size);
+	sqb->frags += rqb->npages; /* first part is for the rq */
+}
+
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
 	int err;
 
-	wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
-	wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1;
-
-	wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB);
-	wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1;
+	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
+		      MLX5_GET(qpc, qpc, log_rq_size),
+		      &wq->rq.fbc);
+	mlx5_fill_fbc(ilog2(MLX5_SEND_WQE_BB),
+		      MLX5_GET(qpc, qpc, log_sq_size),
+		      &wq->sq.fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -123,15 +148,15 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
-				  &wq_ctrl->buf, param->buf_numa_node);
+	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	wq->rq.buf = wq_ctrl->buf.frags->buf;
-	wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
+	mlx5e_qp_set_frag_buf(&wq_ctrl->buf, wq);
+
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
 
@@ -147,7 +172,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
-		     struct mlx5_frag_wq_ctrl *wq_ctrl)
+		     struct mlx5_wq_ctrl *wq_ctrl)
 {
 	int err;
 
@@ -160,7 +185,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 	}
 
 	err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
-				       &wq_ctrl->frag_buf,
+				       &wq_ctrl->buf,
 				       param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
@@ -168,7 +193,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->frag_buf;
+	wq->fbc.frag_buf = wq_ctrl->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -185,12 +210,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *wqc, struct mlx5_wq_ll *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
+	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	struct mlx5_wqe_srq_next_seg *next_seg;
 	int err;
 	int i;
 
-	wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride);
-	wq->sz_m1 = (1 << MLX5_GET(wq, wqc, log_wq_sz)) - 1;
+	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
+		      MLX5_GET(wq, wqc, log_wq_sz),
+		      fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -198,17 +225,17 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
-				  &wq_ctrl->buf, param->buf_numa_node);
+	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.frags->buf;
+	wq->fbc.frag_buf = wq_ctrl->buf;
 	wq->db  = wq_ctrl->db.db;
 
-	for (i = 0; i < wq->sz_m1; i++) {
+	for (i = 0; i < fbc->sz_m1; i++) {
 		next_seg = mlx5_wq_ll_get_wqe(wq, i);
 		next_seg->next_wqe_index = cpu_to_be16(i + 1);
 	}
@@ -227,12 +254,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
 {
-	mlx5_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
+	mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
 	mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
 }
 
-void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl)
-{
-	mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->frag_buf);
-	mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index a3572e148f09..b9d7c01fc7cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -48,17 +48,9 @@ struct mlx5_wq_ctrl {
 	struct mlx5_db		db;
 };
 
-struct mlx5_frag_wq_ctrl {
-	struct mlx5_core_dev	*mdev;
-	struct mlx5_frag_buf	frag_buf;
-	struct mlx5_db		db;
-};
-
 struct mlx5_wq_cyc {
-	void			*buf;
+	struct mlx5_frag_buf_ctrl fbc;
 	__be32			*db;
-	u16			sz_m1;
-	u8			log_stride;
 };
 
 struct mlx5_wq_qp {
@@ -73,20 +65,19 @@ struct mlx5_cqwq {
 };
 
 struct mlx5_wq_ll {
-	void			*buf;
+	struct mlx5_frag_buf_ctrl fbc;
 	__be32			*db;
 	__be16			*tail_next;
-	u16			sz_m1;
 	u16			head;
 	u16			wqe_ctr;
 	u16			cur_sz;
-	u8			log_stride;
 };
 
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
+u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
@@ -94,7 +85,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
-		     struct mlx5_frag_wq_ctrl *wq_ctrl);
+		     struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
@@ -103,16 +94,20 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
-void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl);
 
 static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 {
-	return ctr & wq->sz_m1;
+	return ctr & wq->fbc.sz_m1;
+}
+
+static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
+{
+	return ctr & wq->fbc.frag_sz_m1;
 }
 
 static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
 {
-	return wq->buf + (ix << wq->log_stride);
+	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
 static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
@@ -176,7 +171,7 @@ static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
 
 static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
 {
-	return wq->cur_sz == wq->sz_m1;
+	return wq->cur_sz == wq->fbc.sz_m1;
 }
 
 static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
@@ -186,12 +181,12 @@ static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
 
 static inline u16 mlx5_wq_ll_ctr2ix(struct mlx5_wq_ll *wq, u16 ctr)
 {
-	return ctr & wq->sz_m1;
+	return ctr & wq->fbc.sz_m1;
 }
 
 static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix)
 {
-	return wq->buf + (ix << wq->log_stride);
+	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
 static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 92d292454351..80cbb7fdce4a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -983,16 +983,24 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
-					      void *cqc)
+static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+				 struct mlx5_frag_buf_ctrl *fbc)
 {
-	fbc->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
-	fbc->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
+	fbc->log_stride = log_stride;
+	fbc->log_sz     = log_sz;
 	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
 	fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
 	fbc->frag_sz_m1	= (1 << fbc->log_frag_strides) - 1;
 }
 
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+					      void *cqc)
+{
+	mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz),
+		      MLX5_GET(cqc, cqc, log_cq_size),
+		      fbc);
+}
+
 static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 					  u32 ix)
 {
-- 
2.17.0

^ permalink raw reply related

* [for-next 10/12] net/mlx5e: Move phy link down events counter out of SW stats
From: Saeed Mahameed @ 2018-05-26  0:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Saeed Mahameed
In-Reply-To: <20180526000207.19568-1-saeedm@mellanox.com>

PHY link down events counter belongs to phy_counters group.
although it has special handling, it doesn't mean it can't be there.

Move it to phy_counters_grp handler.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 37 +++++++++++--------
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  3 --
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index e17919c0af08..973939ed8bb5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -81,7 +81,6 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
@@ -175,9 +174,6 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		}
 	}
 
-	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
-				priv->stats.pport.phy_counters,
-				counter_set.phys_layer_cntrs.link_down_events);
 	memcpy(&priv->stats.sw, s, sizeof(*s));
 }
 
@@ -580,12 +576,13 @@ static const struct counter_desc pport_phy_statistical_stats_desc[] = {
 	{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
 };
 
-#define NUM_PPORT_PHY_COUNTERS		ARRAY_SIZE(pport_phy_statistical_stats_desc)
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
 
 static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
 {
+	/* "1" for link_down_events special counter */
 	return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
-		NUM_PPORT_PHY_COUNTERS : 0;
+		NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
 }
 
 static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
@@ -593,10 +590,14 @@ static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
 {
 	int i;
 
-	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
-		for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++)
-			strcpy(data + (idx++) * ETH_GSTRING_LEN,
-			       pport_phy_statistical_stats_desc[i].format);
+	strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+		return idx;
+
+	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pport_phy_statistical_stats_desc[i].format);
 	return idx;
 }
 
@@ -604,11 +605,17 @@ static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 {
 	int i;
 
-	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
-		for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++)
-			data[idx++] =
-				MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
-						    pport_phy_statistical_stats_desc, i);
+	/* link_down_events_phy has special handling since it is not stored in __be64 format */
+	data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
+			       counter_set.phys_layer_cntrs.link_down_events);
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+		return idx;
+
+	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+		data[idx++] =
+			MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+					    pport_phy_statistical_stats_desc, i);
 	return idx;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index a36e6a87066b..39ced559929a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -97,9 +97,6 @@ struct mlx5e_sw_stats {
 	u64 tx_tls_ooo;
 	u64 tx_tls_resync_bytes;
 #endif
-
-	/* Special handling counters */
-	u64 link_down_events_phy;
 };
 
 struct mlx5e_qcounter_stats {
-- 
2.17.0

^ permalink raw reply related

* [for-next 11/12] net/mlx5e: Introducing new statistics rwlock
From: Saeed Mahameed @ 2018-05-26  0:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Shalom Lagziel, Saeed Mahameed
In-Reply-To: <20180526000207.19568-1-saeedm@mellanox.com>

From: Shalom Lagziel <shaloml@mellanox.com>

Introduce a new read/write lock that will protect statistics gathering from
netdev channels configuration changes.
e.g. when channels are being replaced (increase/decrease number of rings)
prevent statistic gathering (ndo_get_stats64) to read the statistics of
in-active channels (channels that are being closed).

Plus update channels software statistics on the fly when calling
ndo_get_stats64, and remove it from stats periodic work.

Fixes: 9218b44dcc05 ("net/mlx5e: Statistics handling refactoring")
Signed-off-by: Shalom Lagziel <shaloml@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h     |  2 ++
 .../net/ethernet/mellanox/mlx5/core/en_main.c    |  8 ++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 16 +++++++++-------
 .../net/ethernet/mellanox/mlx5/core/en_stats.c   |  8 ++++++--
 .../net/ethernet/mellanox/mlx5/core/en_stats.h   |  2 ++
 5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9396db54973f..c3c79f2835d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -771,6 +771,8 @@ struct mlx5e_priv {
 	struct mutex               state_lock; /* Protects Interface state */
 	struct mlx5e_rq            drop_rq;
 
+	rwlock_t                   stats_lock; /* Protects channels SW stats updates */
+	bool                       channels_active;
 	struct mlx5e_channels      channels;
 	u32                        tisn[MLX5E_MAX_NUM_TC];
 	struct mlx5e_rqt           indir_rqt;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0c167e5fc346..0e9c64580abb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2658,6 +2658,9 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 
 	mlx5e_build_channels_tx_maps(priv);
 	mlx5e_activate_channels(&priv->channels);
+	write_lock(&priv->stats_lock);
+	priv->channels_active = true;
+	write_unlock(&priv->stats_lock);
 	netif_tx_start_all_queues(priv->netdev);
 
 	if (MLX5_VPORT_MANAGER(priv->mdev))
@@ -2679,6 +2682,9 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 	 */
 	netif_tx_stop_all_queues(priv->netdev);
 	netif_tx_disable(priv->netdev);
+	write_lock(&priv->stats_lock);
+	priv->channels_active = false;
+	write_unlock(&priv->stats_lock);
 	mlx5e_deactivate_channels(&priv->channels);
 }
 
@@ -3223,6 +3229,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
 		stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
 	} else {
+		mlx5e_grp_sw_update_stats(priv);
 		stats->rx_packets = sstats->rx_packets;
 		stats->rx_bytes   = sstats->rx_bytes;
 		stats->tx_packets = sstats->tx_packets;
@@ -4248,6 +4255,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 			       profile->max_nch(mdev), netdev->mtu);
 
 	mutex_init(&priv->state_lock);
+	rwlock_init(&priv->stats_lock);
 
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c3034f58aa33..1a3f9e091385 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -130,6 +130,10 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 	struct mlx5e_sq_stats *sq_stats;
 	int i, j;
 
+	read_lock(&priv->stats_lock);
+	if (!priv->channels_active)
+	        goto out;
+
 	memset(s, 0, sizeof(*s));
 	for (i = 0; i < priv->channels.num; i++) {
 		struct mlx5e_channel *c = priv->channels.c[i];
@@ -146,12 +150,8 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 			s->tx_bytes		+= sq_stats->bytes;
 		}
 	}
-}
-
-static void mlx5e_rep_update_stats(struct mlx5e_priv *priv)
-{
-	mlx5e_rep_update_sw_counters(priv);
-	mlx5e_rep_update_hw_counters(priv);
+out:
+	read_unlock(&priv->stats_lock);
 }
 
 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
@@ -871,6 +871,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_sw_stats *sstats = &priv->stats.sw;
 
+	mlx5e_rep_update_sw_counters(priv);
+
 	stats->rx_packets = sstats->rx_packets;
 	stats->rx_bytes   = sstats->rx_bytes;
 	stats->tx_packets = sstats->tx_packets;
@@ -1046,7 +1048,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
 	.cleanup_rx		= mlx5e_cleanup_rep_rx,
 	.init_tx		= mlx5e_init_rep_tx,
 	.cleanup_tx		= mlx5e_cleanup_nic_tx,
-	.update_stats           = mlx5e_rep_update_stats,
+	.update_stats           = mlx5e_rep_update_hw_counters,
 	.max_nch		= mlx5e_get_rep_max_num_channels,
 	.update_carrier		= NULL,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 973939ed8bb5..323f2af4200b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -108,7 +108,7 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	return idx;
 }
 
-static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
+void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats temp, *s = &temp;
 	struct mlx5e_rq_stats *rq_stats;
@@ -117,6 +117,9 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 	int i, j;
 
 	memset(s, 0, sizeof(*s));
+	read_lock(&priv->stats_lock);
+	if (!priv->channels_active)
+		goto out;
 	for (i = 0; i < priv->channels.num; i++) {
 		struct mlx5e_channel *c = priv->channels.c[i];
 
@@ -175,6 +178,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 	}
 
 	memcpy(&priv->stats.sw, s, sizeof(*s));
+out:
+	read_unlock(&priv->stats_lock);
 }
 
 static const struct counter_desc q_stats_desc[] = {
@@ -1224,7 +1229,6 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
 		.get_num_stats = mlx5e_grp_sw_get_num_stats,
 		.fill_strings = mlx5e_grp_sw_fill_strings,
 		.fill_stats = mlx5e_grp_sw_fill_stats,
-		.update_stats_mask = MLX5E_NDO_UPDATE_STATS,
 		.update_stats = mlx5e_grp_sw_update_stats,
 	},
 	{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 39ced559929a..390c7afa5188 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -239,4 +239,6 @@ struct mlx5e_stats_grp {
 extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
 extern const int mlx5e_num_stats_grps;
 
+void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+
 #endif /* __MLX5_EN_STATS_H__ */
-- 
2.17.0

^ permalink raw reply related

* [for-next 12/12] net/mlx5e: Avoid reset netdev stats on configuration changes
From: Saeed Mahameed @ 2018-05-26  0:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Qing Huang, Saeed Mahameed
In-Reply-To: <20180526000207.19568-1-saeedm@mellanox.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

Move all RQ, SQ and channel counters from the channel objects into the
priv structure.  With this change, counters will not be reset upon
channel configuration changes.

Channel's statistics for SQs which are associated with TCs higher than
zero will be presented in ethtool -S, only for SQs which were opened at
least once since the module was loaded (regardless of their open/close
current status).  This is done in order to decrease the total amount of
statistics presented and calculated for the common out of box use (no
QoS).

mlx5e_channel_stats is a compound of CH,RQ,SQs stats in order to
create locality for the NAPI when handling TX and RX of the same
channel.

Align the new statistics struct per ring to avoid several channels
update to the same cache line at the same time.
Packet rate was tested, no degradation sensed.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
CC: Qing Huang <qing.huang@oracle.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 14 +++-
 .../mellanox/mlx5/core/en_accel/tls_rxtx.c    |  4 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 28 ++++---
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  4 +-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 75 +++++++++++--------
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 56 +++++++-------
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 49 ++++++------
 .../net/ethernet/mellanox/mlx5/core/en_txrx.c |  6 +-
 8 files changed, 136 insertions(+), 100 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c3c79f2835d2..1c04df043e07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -358,7 +358,6 @@ struct mlx5e_txqsq {
 	/* dirtied @xmit */
 	u16                        pc ____cacheline_aligned_in_smp;
 	u32                        dma_fifo_pc;
-	struct mlx5e_sq_stats      stats;
 
 	struct mlx5e_cq            cq;
 
@@ -371,6 +370,7 @@ struct mlx5e_txqsq {
 	/* read only */
 	struct mlx5_wq_cyc         wq;
 	u32                        dma_fifo_mask;
+	struct mlx5e_sq_stats     *stats;
 	void __iomem              *uar_map;
 	struct netdev_queue       *txq;
 	u32                        sqn;
@@ -526,7 +526,7 @@ struct mlx5e_rq {
 	struct mlx5e_channel  *channel;
 	struct device         *pdev;
 	struct net_device     *netdev;
-	struct mlx5e_rq_stats  stats;
+	struct mlx5e_rq_stats *stats;
 	struct mlx5e_cq        cq;
 	struct mlx5e_page_cache page_cache;
 	struct hwtstamp_config *tstamp;
@@ -574,7 +574,7 @@ struct mlx5e_channel {
 
 	/* data path - accessed per napi poll */
 	struct irq_desc *irq_desc;
-	struct mlx5e_ch_stats      stats;
+	struct mlx5e_ch_stats     *stats;
 
 	/* control */
 	struct mlx5e_priv         *priv;
@@ -590,6 +590,12 @@ struct mlx5e_channels {
 	struct mlx5e_params    params;
 };
 
+struct mlx5e_channel_stats {
+	struct mlx5e_ch_stats ch;
+	struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
+	struct mlx5e_rq_stats rq;
+} ____cacheline_aligned_in_smp;
+
 enum mlx5e_traffic_types {
 	MLX5E_TT_IPV4_TCP,
 	MLX5E_TT_IPV6_TCP,
@@ -793,6 +799,8 @@ struct mlx5e_priv {
 	struct mlx5_core_dev      *mdev;
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
+	struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+	u8                         max_opened_tc;
 	struct hwtstamp_config     tstamp;
 	u16                        q_counter;
 	u16                        drop_rq_q_counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index ad2790fb5966..15aef71d1957 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -174,7 +174,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context *context,
 	int headln;
 	int i;
 
-	sq->stats.tls_ooo++;
+	sq->stats->tls_ooo++;
 
 	if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
 		/* We might get here if a retransmission reaches the driver
@@ -220,7 +220,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context *context,
 	skb_shinfo(nskb)->nr_frags = info.nr_frags;
 	nskb->data_len = info.sync_len;
 	nskb->len += info.sync_len;
-	sq->stats.tls_resync_bytes += nskb->len;
+	sq->stats->tls_resync_bytes += nskb->len;
 	mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
 				    cpu_to_be64(info.rcd_sn));
 	mlx5e_sq_xmit(sq, nskb, *wqe, *pi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e9c64580abb..9b19863b059d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -423,6 +423,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->ix      = c->ix;
 	rq->mdev    = mdev;
 	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	rq->stats   = &c->priv->channel_stats[c->ix].rq;
 
 	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
 	if (IS_ERR(rq->xdp_prog)) {
@@ -1003,7 +1004,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 			     int txq_ix,
 			     struct mlx5e_params *params,
 			     struct mlx5e_sq_param *param,
-			     struct mlx5e_txqsq *sq)
+			     struct mlx5e_txqsq *sq,
+			     int tc)
 {
 	void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
 	struct mlx5_core_dev *mdev = c->mdev;
@@ -1018,6 +1020,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->txq_ix    = txq_ix;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
+	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
 	INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
@@ -1176,13 +1179,14 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
 			    int txq_ix,
 			    struct mlx5e_params *params,
 			    struct mlx5e_sq_param *param,
-			    struct mlx5e_txqsq *sq)
+			    struct mlx5e_txqsq *sq,
+			    int tc)
 {
 	struct mlx5e_create_sq_param csp = {};
 	u32 tx_rate;
 	int err;
 
-	err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq);
+	err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
 	if (err)
 		return err;
 
@@ -1370,7 +1374,7 @@ static void mlx5e_sq_recover(struct work_struct *work)
 		return;
 
 	mlx5e_reset_txqsq_cc_pc(sq);
-	sq->stats.recover++;
+	sq->stats->recover++;
 	recover->last_recover = jiffies;
 	mlx5e_activate_txqsq(sq);
 }
@@ -1665,14 +1669,14 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
 			  struct mlx5e_channel_param *cparam)
 {
-	int err;
-	int tc;
+	struct mlx5e_priv *priv = c->priv;
+	int err, tc, max_nch = priv->profile->max_nch(priv->mdev);
 
 	for (tc = 0; tc < params->num_tc; tc++) {
-		int txq_ix = c->ix + tc * params->num_channels;
+		int txq_ix = c->ix + tc * max_nch;
 
 		err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix,
-				       params, &cparam->sq, &c->sq[tc]);
+				       params, &cparam->sq, &c->sq[tc], tc);
 		if (err)
 			goto err_close_sqs;
 	}
@@ -1802,6 +1806,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
 	c->num_tc   = params->num_tc;
 	c->xdp      = !!params->xdp_prog;
+	c->stats    = &priv->channel_stats[ix].ch;
 
 	mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
 	c->irq_desc = irq_to_desc(irq);
@@ -2634,7 +2639,7 @@ static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv)
 	struct mlx5e_txqsq *sq;
 	int i, tc;
 
-	for (i = 0; i < priv->channels.num; i++)
+	for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
 		for (tc = 0; tc < priv->profile->max_tc; tc++)
 			priv->channel_tc2txq[i][tc] = i + tc * priv->channels.num;
 
@@ -3139,6 +3144,8 @@ static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
 	if (err)
 		goto out;
 
+	priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+				    new_channels.params.num_tc);
 	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 out:
 	mutex_unlock(&priv->state_lock);
@@ -3826,7 +3833,7 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
 		return false;
 
 	netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
-	sq->channel->stats.eq_rearm++;
+	sq->channel->stats->eq_rearm++;
 	return true;
 }
 
@@ -4250,6 +4257,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 	priv->profile     = profile;
 	priv->ppriv       = ppriv;
 	priv->msglevel    = MLX5E_MSG_LEVEL;
+	priv->max_opened_tc = 1;
 
 	mlx5e_build_nic_params(mdev, &priv->channels.params,
 			       profile->max_nch(mdev), netdev->mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1a3f9e091385..de6364125f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -138,13 +138,13 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 	for (i = 0; i < priv->channels.num; i++) {
 		struct mlx5e_channel *c = priv->channels.c[i];
 
-		rq_stats = &c->rq.stats;
+		rq_stats = c->rq.stats;
 
 		s->rx_packets	+= rq_stats->packets;
 		s->rx_bytes	+= rq_stats->bytes;
 
 		for (j = 0; j < priv->channels.params.num_tc; j++) {
-			sq_stats = &c->sq[j].stats;
+			sq_stats = c->sq[j].stats;
 
 			s->tx_packets		+= sq_stats->packets;
 			s->tx_bytes		+= sq_stats->bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ac54380d41e4..bfef73b37fbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -65,7 +65,7 @@ static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
 	mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
 	cq->decmprs_left        = be32_to_cpu(cq->title.byte_cnt);
 	cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
-	rq->stats.cqe_compress_blks++;
+	rq->stats->cqe_compress_blks++;
 }
 
 static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
@@ -146,7 +146,7 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
 	mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
 	cq->wq.cc = cqcc;
 	cq->decmprs_left -= cqe_count;
-	rq->stats.cqe_compress_pkts += cqe_count;
+	rq->stats->cqe_compress_pkts += cqe_count;
 
 	return cqe_count;
 }
@@ -176,14 +176,15 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
 {
 	struct mlx5e_page_cache *cache = &rq->page_cache;
 	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+	struct mlx5e_rq_stats *stats = rq->stats;
 
 	if (tail_next == cache->head) {
-		rq->stats.cache_full++;
+		stats->cache_full++;
 		return false;
 	}
 
 	if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
-		rq->stats.cache_waive++;
+		stats->cache_waive++;
 		return false;
 	}
 
@@ -196,20 +197,21 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
 				      struct mlx5e_dma_info *dma_info)
 {
 	struct mlx5e_page_cache *cache = &rq->page_cache;
+	struct mlx5e_rq_stats *stats = rq->stats;
 
 	if (unlikely(cache->head == cache->tail)) {
-		rq->stats.cache_empty++;
+		stats->cache_empty++;
 		return false;
 	}
 
 	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
-		rq->stats.cache_busy++;
+		stats->cache_busy++;
 		return false;
 	}
 
 	*dma_info = cache->page_cache[cache->head];
 	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
-	rq->stats.cache_reuse++;
+	stats->cache_reuse++;
 
 	dma_sync_single_for_device(rq->pdev, dma_info->addr,
 				   RQ_PAGE_SIZE(rq),
@@ -294,7 +296,7 @@ static inline void mlx5e_free_rx_wqe_reuse(struct mlx5e_rq *rq,
 					   struct mlx5e_wqe_frag_info *wi)
 {
 	if (mlx5e_page_reuse(rq, wi)) {
-		rq->stats.page_reuse++;
+		rq->stats->page_reuse++;
 		return;
 	}
 
@@ -452,7 +454,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 		dma_info--;
 		mlx5e_page_release(rq, dma_info, true);
 	}
-	rq->stats.buff_alloc_err++;
+	rq->stats->buff_alloc_err++;
 
 	return err;
 }
@@ -480,7 +482,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 
 		err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head);
 		if (unlikely(err)) {
-			rq->stats.buff_alloc_err++;
+			rq->stats->buff_alloc_err++;
 			break;
 		}
 
@@ -652,6 +654,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 				     struct sk_buff *skb,
 				     bool   lro)
 {
+	struct mlx5e_rq_stats *stats = rq->stats;
 	int network_depth = 0;
 
 	if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
@@ -659,7 +662,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 
 	if (lro) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		rq->stats.csum_unnecessary++;
+		stats->csum_unnecessary++;
 		return;
 	}
 
@@ -674,7 +677,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 			skb->csum = csum_partial(skb->data + ETH_HLEN,
 						 network_depth - ETH_HLEN,
 						 skb->csum);
-		rq->stats.csum_complete++;
+		stats->csum_complete++;
 		return;
 	}
 
@@ -684,15 +687,15 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		if (cqe_is_tunneled(cqe)) {
 			skb->csum_level = 1;
 			skb->encapsulation = 1;
-			rq->stats.csum_unnecessary_inner++;
+			stats->csum_unnecessary_inner++;
 			return;
 		}
-		rq->stats.csum_unnecessary++;
+		stats->csum_unnecessary++;
 		return;
 	}
 csum_none:
 	skb->ip_summed = CHECKSUM_NONE;
-	rq->stats.csum_none++;
+	stats->csum_none++;
 }
 
 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
@@ -701,6 +704,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 				      struct sk_buff *skb)
 {
 	u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+	struct mlx5e_rq_stats *stats = rq->stats;
 	struct net_device *netdev = rq->netdev;
 
 	skb->mac_len = ETH_HLEN;
@@ -710,9 +714,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 		/* Subtract one since we already counted this as one
 		 * "regular" packet in mlx5e_complete_rx_cqe()
 		 */
-		rq->stats.packets += lro_num_seg - 1;
-		rq->stats.lro_packets++;
-		rq->stats.lro_bytes += cqe_bcnt;
+		stats->packets += lro_num_seg - 1;
+		stats->lro_packets++;
+		stats->lro_bytes += cqe_bcnt;
 	}
 
 	if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
@@ -727,7 +731,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 	if (cqe_has_vlan(cqe)) {
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 				       be16_to_cpu(cqe->vlan_info));
-		rq->stats.removed_vlan_packets++;
+		stats->removed_vlan_packets++;
 	}
 
 	skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
@@ -741,8 +745,10 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
 					 u32 cqe_bcnt,
 					 struct sk_buff *skb)
 {
-	rq->stats.packets++;
-	rq->stats.bytes += cqe_bcnt;
+	struct mlx5e_rq_stats *stats = rq->stats;
+
+	stats->packets++;
+	stats->bytes += cqe_bcnt;
 	mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
 }
 
@@ -774,10 +780,12 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 	dma_addr_t dma_addr  = di->addr + data_offset;
 	unsigned int dma_len = xdp->data_end - xdp->data;
 
+	struct mlx5e_rq_stats *stats = rq->stats;
+
 	prefetchw(wqe);
 
 	if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || rq->hw_mtu < dma_len)) {
-		rq->stats.xdp_drop++;
+		stats->xdp_drop++;
 		return false;
 	}
 
@@ -787,7 +795,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 			mlx5e_xmit_xdp_doorbell(sq);
 			sq->db.doorbell = false;
 		}
-		rq->stats.xdp_tx_full++;
+		stats->xdp_tx_full++;
 		return false;
 	}
 
@@ -821,7 +829,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 
 	sq->db.doorbell = true;
 
-	rq->stats.xdp_tx++;
+	stats->xdp_tx++;
 	return true;
 }
 
@@ -868,7 +876,7 @@ static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
 	case XDP_ABORTED:
 		trace_xdp_exception(rq->netdev, prog, act);
 	case XDP_DROP:
-		rq->stats.xdp_drop++;
+		rq->stats->xdp_drop++;
 		return true;
 	}
 }
@@ -881,7 +889,7 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
 	struct sk_buff *skb = build_skb(va, frag_size);
 
 	if (unlikely(!skb)) {
-		rq->stats.buff_alloc_err++;
+		rq->stats->buff_alloc_err++;
 		return NULL;
 	}
 
@@ -913,7 +921,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	wi->offset += frag_size;
 
 	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
-		rq->stats.wqe_err++;
+		rq->stats->wqe_err++;
 		return NULL;
 	}
 
@@ -1030,7 +1038,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 	skb = napi_alloc_skb(rq->cq.napi,
 			     ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, sizeof(long)));
 	if (unlikely(!skb)) {
-		rq->stats.buff_alloc_err++;
+		rq->stats->buff_alloc_err++;
 		return NULL;
 	}
 
@@ -1116,12 +1124,12 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	wi->consumed_strides += cstrides;
 
 	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
-		rq->stats.wqe_err++;
+		rq->stats->wqe_err++;
 		goto mpwrq_cqe_out;
 	}
 
 	if (unlikely(mpwrq_is_filler_cqe(cqe))) {
-		rq->stats.mpwqe_filler++;
+		rq->stats->mpwqe_filler++;
 		goto mpwrq_cqe_out;
 	}
 
@@ -1276,6 +1284,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 u32 cqe_bcnt,
 					 struct sk_buff *skb)
 {
+	struct mlx5e_rq_stats *stats = rq->stats;
 	struct hwtstamp_config *tstamp;
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
@@ -1337,9 +1346,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 
 	skb->dev = netdev;
 
-	rq->stats.csum_complete++;
-	rq->stats.packets++;
-	rq->stats.bytes += cqe_bcnt;
+	stats->csum_complete++;
+	stats->packets++;
+	stats->bytes += cqe_bcnt;
 }
 
 void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 323f2af4200b..776b4d68e156 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -111,20 +111,19 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats temp, *s = &temp;
-	struct mlx5e_rq_stats *rq_stats;
-	struct mlx5e_sq_stats *sq_stats;
-	struct mlx5e_ch_stats *ch_stats;
-	int i, j;
+	int i;
 
 	memset(s, 0, sizeof(*s));
 	read_lock(&priv->stats_lock);
 	if (!priv->channels_active)
 		goto out;
-	for (i = 0; i < priv->channels.num; i++) {
-		struct mlx5e_channel *c = priv->channels.c[i];
 
-		rq_stats = &c->rq.stats;
-		ch_stats = &c->stats;
+	for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) {
+		struct mlx5e_channel_stats *channel_stats =
+			&priv->channel_stats[i];
+		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+		struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
+		int j;
 
 		s->rx_packets	+= rq_stats->packets;
 		s->rx_bytes	+= rq_stats->bytes;
@@ -151,8 +150,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->rx_cache_waive += rq_stats->cache_waive;
 		s->ch_eq_rearm += ch_stats->eq_rearm;
 
-		for (j = 0; j < priv->channels.params.num_tc; j++) {
-			sq_stats = &c->sq[j].stats;
+		for (j = 0; j < priv->max_opened_tc; j++) {
+			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
 
 			s->tx_packets		+= sq_stats->packets;
 			s->tx_bytes		+= sq_stats->bytes;
@@ -1160,30 +1159,37 @@ static const struct counter_desc ch_stats_desc[] = {
 
 static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 {
-	return (NUM_RQ_STATS * priv->channels.num) +
-		(NUM_CH_STATS * priv->channels.num) +
-		(NUM_SQ_STATS * priv->channels.num * priv->channels.params.num_tc);
+	int max_nch = priv->profile->max_nch(priv->mdev);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return 0;
+
+	return (NUM_RQ_STATS * max_nch) +
+	       (NUM_CH_STATS * max_nch) +
+	       (NUM_SQ_STATS * max_nch * priv->max_opened_tc);
 }
 
 static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 					   int idx)
 {
+	int max_nch = priv->profile->max_nch(priv->mdev);
 	int i, j, tc;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return idx;
 
-	for (i = 0; i < priv->channels.num; i++)
+	for (i = 0; i < max_nch; i++)
 		for (j = 0; j < NUM_CH_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				ch_stats_desc[j].format, i);
 
-	for (i = 0; i < priv->channels.num; i++)
+	for (i = 0; i < max_nch; i++)
 		for (j = 0; j < NUM_RQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i);
 
-	for (tc = 0; tc < priv->channels.params.num_tc; tc++)
-		for (i = 0; i < priv->channels.num; i++)
+	/* priv->channel_tc2txq[i][tc] is valid only when device is open */
+	for (tc = 0; tc < priv->max_opened_tc; tc++)
+		for (i = 0; i < max_nch; i++)
 			for (j = 0; j < NUM_SQ_STATS; j++)
 				sprintf(data + (idx++) * ETH_GSTRING_LEN,
 					sq_stats_desc[j].format,
@@ -1195,29 +1201,29 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 					 int idx)
 {
-	struct mlx5e_channels *channels = &priv->channels;
+	int max_nch = priv->profile->max_nch(priv->mdev);
 	int i, j, tc;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return idx;
 
-	for (i = 0; i < channels->num; i++)
+	for (i = 0; i < max_nch; i++)
 		for (j = 0; j < NUM_CH_STATS; j++)
 			data[idx++] =
-				MLX5E_READ_CTR64_CPU(&channels->c[i]->stats,
+				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch,
 						     ch_stats_desc, j);
 
-	for (i = 0; i < channels->num; i++)
+	for (i = 0; i < max_nch; i++)
 		for (j = 0; j < NUM_RQ_STATS; j++)
 			data[idx++] =
-				MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats,
+				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
 						     rq_stats_desc, j);
 
-	for (tc = 0; tc < priv->channels.params.num_tc; tc++)
-		for (i = 0; i < channels->num; i++)
+	for (tc = 0; tc < priv->max_opened_tc; tc++)
+		for (i = 0; i < max_nch; i++)
 			for (j = 0; j < NUM_SQ_STATS; j++)
 				data[idx++] =
-					MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats,
+					MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
 							     sq_stats_desc, j);
 
 	return idx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index d37566be06e1..aafd75257fd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -220,28 +220,29 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct
 		if (skb->encapsulation) {
 			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
 					  MLX5_ETH_WQE_L4_INNER_CSUM;
-			sq->stats.csum_partial_inner++;
+			sq->stats->csum_partial_inner++;
 		} else {
 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
-			sq->stats.csum_partial++;
+			sq->stats->csum_partial++;
 		}
 	} else
-		sq->stats.csum_none++;
+		sq->stats->csum_none++;
 }
 
 static inline u16
 mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
 {
+	struct mlx5e_sq_stats *stats = sq->stats;
 	u16 ihs;
 
 	if (skb->encapsulation) {
 		ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
-		sq->stats.tso_inner_packets++;
-		sq->stats.tso_inner_bytes += skb->len - ihs;
+		stats->tso_inner_packets++;
+		stats->tso_inner_bytes += skb->len - ihs;
 	} else {
 		ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
-		sq->stats.tso_packets++;
-		sq->stats.tso_bytes += skb->len - ihs;
+		stats->tso_packets++;
+		stats->tso_bytes += skb->len - ihs;
 	}
 
 	return ihs;
@@ -311,7 +312,7 @@ static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
 		wi->num_wqebbs = 1;
 		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
 	}
-	sq->stats.nop += nnops;
+	sq->stats->nop += nnops;
 }
 
 static inline void
@@ -337,7 +338,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	sq->pc += wi->num_wqebbs;
 	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
 		netif_tx_stop_queue(sq->txq);
-		sq->stats.stopped++;
+		sq->stats->stopped++;
 	}
 
 	if (!skb->xmit_more || netif_xmit_stopped(sq->txq))
@@ -355,6 +356,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5_wqe_data_seg *dseg;
 	struct mlx5e_tx_wqe_info *wi;
 
+	struct mlx5e_sq_stats *stats = sq->stats;
 	unsigned char *skb_data = skb->data;
 	unsigned int skb_len = skb->len;
 	u16 ds_cnt, ds_cnt_inl = 0;
@@ -371,17 +373,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		mss       = cpu_to_be16(skb_shinfo(skb)->gso_size);
 		ihs       = mlx5e_tx_get_gso_ihs(sq, skb);
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
-		sq->stats.packets += skb_shinfo(skb)->gso_segs;
+		stats->packets += skb_shinfo(skb)->gso_segs;
 	} else {
 		opcode    = MLX5_OPCODE_SEND;
 		mss       = 0;
 		ihs       = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
-		sq->stats.packets++;
+		stats->packets++;
 	}
 
-	sq->stats.bytes     += num_bytes;
-	sq->stats.xmit_more += skb->xmit_more;
+	stats->bytes     += num_bytes;
+	stats->xmit_more += skb->xmit_more;
 
 	headlen = skb_len - ihs - skb->data_len;
 	ds_cnt += !!headlen;
@@ -415,7 +417,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		if (skb_vlan_tag_present(skb)) {
 			mlx5e_insert_vlan(eseg->inline_hdr.start, skb,
 					  ihs - VLAN_HLEN, &skb_data, &skb_len);
-			sq->stats.added_vlan_packets++;
+			stats->added_vlan_packets++;
 		} else {
 			memcpy(eseg->inline_hdr.start, skb_data, ihs);
 			mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs);
@@ -427,7 +429,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
 			eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
 		eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
-		sq->stats.added_vlan_packets++;
+		stats->added_vlan_packets++;
 	}
 
 	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, dseg);
@@ -440,7 +442,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	return NETDEV_TX_OK;
 
 err_drop:
-	sq->stats.dropped++;
+	stats->dropped++;
 	dev_kfree_skb_any(skb);
 
 	return NETDEV_TX_OK;
@@ -524,7 +526,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 				queue_work(cq->channel->priv->wq,
 					   &sq->recover.recover_work);
 			}
-			sq->stats.cqe_err++;
+			sq->stats->cqe_err++;
 		}
 
 		do {
@@ -584,7 +586,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 				   MLX5E_SQ_STOP_ROOM) &&
 	    !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
 		netif_tx_wake_queue(sq->txq);
-		sq->stats.wake++;
+		sq->stats->wake++;
 	}
 
 	return (i == MLX5E_TX_CQ_POLL_BUDGET);
@@ -641,6 +643,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5_wqe_data_seg *dseg;
 	struct mlx5e_tx_wqe_info *wi;
 
+	struct mlx5e_sq_stats *stats = sq->stats;
 	unsigned char *skb_data = skb->data;
 	unsigned int skb_len = skb->len;
 	u16 headlen, ihs, pi, frag_pi;
@@ -659,17 +662,17 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		mss       = cpu_to_be16(skb_shinfo(skb)->gso_size);
 		ihs       = mlx5e_tx_get_gso_ihs(sq, skb);
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
-		sq->stats.packets += skb_shinfo(skb)->gso_segs;
+		stats->packets += skb_shinfo(skb)->gso_segs;
 	} else {
 		opcode    = MLX5_OPCODE_SEND;
 		mss       = 0;
 		ihs       = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
-		sq->stats.packets++;
+		stats->packets++;
 	}
 
-	sq->stats.bytes     += num_bytes;
-	sq->stats.xmit_more += skb->xmit_more;
+	stats->bytes     += num_bytes;
+	stats->xmit_more += skb->xmit_more;
 
 	headlen = skb_len - ihs - skb->data_len;
 	ds_cnt += !!headlen;
@@ -716,7 +719,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	return NETDEV_TX_OK;
 
 err_drop:
-	sq->stats.dropped++;
+	stats->dropped++;
 	dev_kfree_skb_any(skb);
 
 	return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 5d6f9ce2bf80..1b17f682693b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -46,24 +46,26 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 
 static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
 {
+	struct mlx5e_sq_stats *stats = sq->stats;
 	struct net_dim_sample dim_sample;
 
 	if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
 		return;
 
-	net_dim_sample(sq->cq.event_ctr, sq->stats.packets, sq->stats.bytes,
+	net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
 		       &dim_sample);
 	net_dim(&sq->dim, dim_sample);
 }
 
 static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
 {
+	struct mlx5e_rq_stats *stats = rq->stats;
 	struct net_dim_sample dim_sample;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
 		return;
 
-	net_dim_sample(rq->cq.event_ctr, rq->stats.packets, rq->stats.bytes,
+	net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
 		       &dim_sample);
 	net_dim(&rq->dim, dim_sample);
 }
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH] IB: Revert "remove redundant INFINIBAND kconfig dependencies"
From: Greg Thelen @ 2018-05-26  0:32 UTC (permalink / raw)
  To: arnd
  Cc: Doug Ledford, Jason Gunthorpe, Keith Busch, Jens Axboe,
	Christoph Hellwig, Sagi Grimberg, oleg.drokin, andreas.dilger,
	jsimmons, gregkh, Steve French, ericvh, rminnich, lucho,
	David S. Miller, santosh.shilimkar, trond.myklebust,
	anna.schumaker, bfields, jlayton, Bart Van Assche, linux-rdma,
	LKML, linux-nvme, lustre-devel, devel, linux-cifs
In-Reply-To: <20180525213123.2113748-1-arnd@arndb.de>

On Fri, May 25, 2018 at 2:32 PM Arnd Bergmann <arnd@arndb.de> wrote:

> Several subsystems depend on INFINIBAND_ADDR_TRANS, which in turn depends
> on INFINIBAND. However, when with CONFIG_INIFIBAND=m, this leads to a
> link error when another driver using it is built-in. The
> INFINIBAND_ADDR_TRANS dependency is insufficient here as this is
> a 'bool' symbol that does not force anything to be a module in turn.

> fs/cifs/smbdirect.o: In function `smbd_disconnect_rdma_work':
> smbdirect.c:(.text+0x1e4): undefined reference to `rdma_disconnect'
> net/9p/trans_rdma.o: In function `rdma_request':
> trans_rdma.c:(.text+0x7bc): undefined reference to `rdma_disconnect'
> net/9p/trans_rdma.o: In function `rdma_destroy_trans':
> trans_rdma.c:(.text+0x830): undefined reference to `ib_destroy_qp'
> trans_rdma.c:(.text+0x858): undefined reference to `ib_dealloc_pd'

> Fixes: 9533b292a7ac ("IB: remove redundant INFINIBAND kconfig
dependencies")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Acked-by: Greg Thelen <gthelen@google.com>

Sorry for the 9533b292a7ac problem.
At this point the in release cycle, I think Arnd's revert is best.

If there is interest, I've put a little thought into an alternative fix:
making INFINIBAND_ADDR_TRANS tristate.  But it's nontrivial.
So I prefer this simple revert for now.

Doug: do you need anything from me on this?

> ---
> The patch that introduced the problem has been queued in the
> rdma-fixes/for-rc tree. Please revert the patch before sending
> the branch to Linus.
> ---
>     drivers/infiniband/ulp/srpt/Kconfig | 2 +-
>     drivers/nvme/host/Kconfig           | 2 +-
>     drivers/nvme/target/Kconfig         | 2 +-
>     drivers/staging/lustre/lnet/Kconfig | 2 +-
>     fs/cifs/Kconfig                     | 2 +-
>     net/9p/Kconfig                      | 2 +-
>     net/rds/Kconfig                     | 2 +-
>     net/sunrpc/Kconfig                  | 2 +-
>     8 files changed, 8 insertions(+), 8 deletions(-)

> diff --git a/drivers/infiniband/ulp/srpt/Kconfig
b/drivers/infiniband/ulp/srpt/Kconfig
> index 25bf6955b6d0..fb8b7182f05e 100644
> --- a/drivers/infiniband/ulp/srpt/Kconfig
> +++ b/drivers/infiniband/ulp/srpt/Kconfig
> @@ -1,6 +1,6 @@
>     config INFINIBAND_SRPT
>            tristate "InfiniBand SCSI RDMA Protocol target support"
> -       depends on INFINIBAND_ADDR_TRANS && TARGET_CORE
> +       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE
>            ---help---

>              Support for the SCSI RDMA Protocol (SRP) Target driver. The
> diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
> index dbb7464c018c..88a8b5916624 100644
> --- a/drivers/nvme/host/Kconfig
> +++ b/drivers/nvme/host/Kconfig
> @@ -27,7 +27,7 @@ config NVME_FABRICS

>     config NVME_RDMA
>            tristate "NVM Express over Fabrics RDMA host driver"
> -       depends on INFINIBAND_ADDR_TRANS && BLOCK
> +       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
>            select NVME_CORE
>            select NVME_FABRICS
>            select SG_POOL
> diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
> index 7595664ee753..3c7b61ddb0d1 100644
> --- a/drivers/nvme/target/Kconfig
> +++ b/drivers/nvme/target/Kconfig
> @@ -27,7 +27,7 @@ config NVME_TARGET_LOOP

>     config NVME_TARGET_RDMA
>            tristate "NVMe over Fabrics RDMA target support"
> -       depends on INFINIBAND_ADDR_TRANS
> +       depends on INFINIBAND && INFINIBAND_ADDR_TRANS
>            depends on NVME_TARGET
>            select SGL_ALLOC
>            help
> diff --git a/drivers/staging/lustre/lnet/Kconfig
b/drivers/staging/lustre/lnet/Kconfig
> index f3b1ad4bd3dc..ad049e6f24e4 100644
> --- a/drivers/staging/lustre/lnet/Kconfig
> +++ b/drivers/staging/lustre/lnet/Kconfig
> @@ -34,7 +34,7 @@ config LNET_SELFTEST

>     config LNET_XPRT_IB
>            tristate "LNET infiniband support"
> -       depends on LNET && PCI && INFINIBAND_ADDR_TRANS
> +       depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
>            default LNET && INFINIBAND
>            help
>              This option allows the LNET users to use infiniband as an
> diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
> index d61e2de8d0eb..5f132d59dfc2 100644
> --- a/fs/cifs/Kconfig
> +++ b/fs/cifs/Kconfig
> @@ -197,7 +197,7 @@ config CIFS_SMB311

>     config CIFS_SMB_DIRECT
>            bool "SMB Direct support (Experimental)"
> -       depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y &&
INFINIBAND_ADDR_TRANS=y
> +       depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS ||
CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
>            help
>              Enables SMB Direct experimental support for SMB 3.0, 3.02 and
3.1.1.
>              SMB Direct allows transferring SMB packets over RDMA. If
unsure,
> diff --git a/net/9p/Kconfig b/net/9p/Kconfig
> index 46c39f7da444..e6014e0e51f7 100644
> --- a/net/9p/Kconfig
> +++ b/net/9p/Kconfig
> @@ -32,7 +32,7 @@ config NET_9P_XEN


>     config NET_9P_RDMA
> -       depends on INET && INFINIBAND_ADDR_TRANS
> +       depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
>            tristate "9P RDMA Transport (Experimental)"
>            help
>              This builds support for an RDMA transport.
> diff --git a/net/rds/Kconfig b/net/rds/Kconfig
> index 1a31502ee7db..bffde4b46c5d 100644
> --- a/net/rds/Kconfig
> +++ b/net/rds/Kconfig
> @@ -8,7 +8,7 @@ config RDS

>     config RDS_RDMA
>            tristate "RDS over Infiniband"
> -       depends on RDS && INFINIBAND_ADDR_TRANS
> +       depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
>            ---help---
>              Allow RDS to use Infiniband as a transport.
>              This transport supports RDMA operations.
> diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
> index 6358e5271070..ac09ca803296 100644
> --- a/net/sunrpc/Kconfig
> +++ b/net/sunrpc/Kconfig
> @@ -50,7 +50,7 @@ config SUNRPC_DEBUG

>     config SUNRPC_XPRT_RDMA
>            tristate "RPC-over-RDMA transport"
> -       depends on SUNRPC && INFINIBAND_ADDR_TRANS
> +       depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
>            default SUNRPC && INFINIBAND
>            select SG_POOL
>            help
> --
> 2.9.0

^ permalink raw reply

* Re: [PATCH net-next] bpfilter: fix a build err
From: YueHaibing @ 2018-05-26  2:25 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: davem, ast, netdev, linux-kernel
In-Reply-To: <20180525161925.crdamzqjgs5wg77e@ast-mbp>

On 2018/5/26 0:19, Alexei Starovoitov wrote:
> On Fri, May 25, 2018 at 06:17:57PM +0800, YueHaibing wrote:
>> gcc-7.3.0 report following err:
>>
>>   HOSTCC  net/bpfilter/main.o
>> In file included from net/bpfilter/main.c:9:0:
>> ./include/uapi/linux/bpf.h:12:10: fatal error: linux/bpf_common.h: No such file or directory
>>  #include <linux/bpf_common.h>
>>
>> remove it by adding a include path.
>> Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
>>
>> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
>> ---
>>  net/bpfilter/Makefile | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
>> index 2af752c..3f3cb87 100644
>> --- a/net/bpfilter/Makefile
>> +++ b/net/bpfilter/Makefile
>> @@ -5,7 +5,7 @@
>>  
>>  hostprogs-y := bpfilter_umh
>>  bpfilter_umh-objs := main.o
>> -HOSTCFLAGS += -I. -Itools/include/
>> +HOSTCFLAGS += -I. -Itools/include/ -Itools/include/uapi
> 
> Strangely I don't see this error with gcc 7.3
> I've tried this patch and it doesn't hurt,
> but before it gets applied could you please try
> the top two patches from this tree:
> https://git.kernel.org/pub/scm/linux/kernel/git/ast/bpf.git/?h=ipt_bpf
> in your environment?
> These two patches add the actual meat of bpfilter and I'd like
> to make sure the build setup is good for everyone before
> we proceed too far.

after applied these two patches on net-next, the err still here:
 bpfilter: rough bpfilter codegen example hack
 bpfilter: add iptable get/set parsing

  HOSTCC  net/bpfilter/main.o
In file included from net/bpfilter/main.c:13:0:
./include/uapi/linux/bpf.h:12:10: fatal error: linux/bpf_common.h: No such file or directory
 #include <linux/bpf_common.h>
          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [net/bpfilter/main.o] Error 1
make[1]: *** [net/bpfilter] Error 2
make: *** [net] Error 2

Also I compile your tree, error is same

my gcc version info as follow:
[root@localhost net-next]# gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/home/yuehb/gcc-7.3.0-tools/libexec/gcc/x86_64-pc-linux-gnu/7.3.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../gcc-7.3.0/configure --enable-checking=release --enable-languages=c,c++
--disable-multilib --prefix=/home/yuehb/gcc-7.3.0-tools
Thread model: posix
gcc version 7.3.0 (GCC)

> 
> 
> .
> 

^ permalink raw reply

* Re: [PATCH net-next 0/8] nfp: offload LAG for tc flower egress
From: Jakub Kicinski @ 2018-05-26  2:47 UTC (permalink / raw)
  To: Jiri Pirko, John Hurley
  Cc: davem, netdev, oss-drivers, Jay Vosburgh, Veaceslav Falico,
	Andy Gospodarek
In-Reply-To: <20180525064809.GG2295@nanopsycho>

On Fri, 25 May 2018 08:48:09 +0200, Jiri Pirko wrote:
> Thu, May 24, 2018 at 04:22:47AM CEST, jakub.kicinski@netronome.com wrote:
> >Hi!
> >
> >This series from John adds bond offload to the nfp driver.  Patch 5
> >exposes the hash type for NETDEV_LAG_TX_TYPE_HASH to make sure nfp
> >hashing matches that of the software LAG.  This may be unnecessarily
> >conservative, let's see what LAG maintainers think :)  
> 
> So you need to restrict offload to only certain hash algo? In mlxsw, we
> just ignore the lag setting and do some hw default hashing. Would not be
> enough? Note that there's a good reason for it, as you see, in team, the
> hashing is done in a BPF function and could be totally arbitrary.
> Your patchset effectively disables team offload for nfp.

My understanding is that the project requirements only called for L3/L4
hash algorithm offload, hence the temptation to err on the side of
caution and not offload all the bond configurations.  John can provide
more details.  Not being able to offload team is unfortunate indeed.

^ permalink raw reply

* [PATCH] PCI: reset driver SR-IOV state after remove
From: Jakub Kicinski @ 2018-05-26  3:00 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: linux-pci, netdev, Sathya Perla, Felix Manlunas, alexander.duyck,
	Jacob Keller, Donald Dutile, oss-drivers, Christoph Hellwig,
	Jakub Kicinski
In-Reply-To: <20180525214525.GB92995@bhelgaas-glaptop.roam.corp.google.com>

Bjorn points out that currently core and most of the drivers don't
clean up dev->sriov->driver_max_VFs settings on .remove().  This
means that if a different driver is bound afterwards it will
inherit the old setting:

  - load PF driver 1
  - driver calls pci_sriov_set_totalvfs() to reduce driver_max_VFs
  - unload PF driver 1
  - load PF driver 2

Reset driver_max_VFs back to total_VFs after device remove.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
I gave into the temptation and also added a warning about SR-IOV
being on after remove :)  Please let me know if this is anywhere
close to what you had in mind!

 drivers/pci/iov.c        | 16 ++++++++++++++++
 drivers/pci/pci-driver.c |  1 +
 drivers/pci/pci.h        |  4 ++++
 3 files changed, 21 insertions(+)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index db86fd26f8e1..5d0f560a1e28 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -574,6 +574,22 @@ void pci_iov_release(struct pci_dev *dev)
 		sriov_release(dev);
 }
 
+/**
+ * pci_sriov_drv_cleanup - clean up SR-IOV state after PF driver is detached
+ * @dev: the PCI device
+ */
+void pci_sriov_drv_cleanup(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!dev->is_physfn)
+		return;
+	iov->driver_max_VFs = iov->total_VFs;
+	if (iov->num_VFs)
+		dev_warn(&dev->dev,
+			 "driver left SR-IOV enabled after remove\n");
+}
+
 /**
  * pci_iov_update_resource - update a VF BAR
  * @dev: the PCI device
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index b9a131137e64..932a1acf7b1b 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -443,6 +443,7 @@ static int pci_device_remove(struct device *dev)
 		}
 		pcibios_free_irq(pci_dev);
 		pci_dev->driver = NULL;
+		pci_sriov_drv_cleanup(pci_dev);
 	}
 
 	/* Undo the runtime PM settings in local_pci_probe() */
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 023f7cf25bff..5fa6d19762bd 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -311,6 +311,7 @@ static inline void pci_restore_ats_state(struct pci_dev *dev)
 #ifdef CONFIG_PCI_IOV
 int pci_iov_init(struct pci_dev *dev);
 void pci_iov_release(struct pci_dev *dev);
+void pci_sriov_drv_cleanup(struct pci_dev *dev);
 void pci_iov_update_resource(struct pci_dev *dev, int resno);
 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
 void pci_restore_iov_state(struct pci_dev *dev);
@@ -323,6 +324,9 @@ static inline int pci_iov_init(struct pci_dev *dev)
 }
 static inline void pci_iov_release(struct pci_dev *dev)
 
+{
+}
+static inline void pci_sriov_drv_cleanup(struct pci_dev *dev)
 {
 }
 static inline void pci_restore_iov_state(struct pci_dev *dev)
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH] net: netsec: reduce DMA mask to 40 bits
From: Jassi Brar @ 2018-05-26  3:26 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Ard Biesheuvel, netdev, David S. Miller, Masahisa Kojima,
	Ilias Apalodimas, nd
In-Reply-To: <20180525203730.20e8ec72@m750>

On 26 May 2018 at 01:07, Robin Murphy <robin.murphy@arm.com> wrote:
> On Sat, 26 May 2018 00:33:05 +0530
> Jassi Brar <jaswinder.singh@linaro.org> wrote:
>
>> On 25 May 2018 at 18:20, Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> wrote:
>> > The netsec network controller IP can drive 64 address bits for DMA,
>> > and the DMA mask is set accordingly in the driver. However, the
>> > SynQuacer SoC, which is the only silicon incorporating this IP at
>> > the moment, integrates this IP in a manner that leaves address bits
>> > [63:40] unconnected.
>> >
>> > Up until now, this has not resulted in any problems, given that the
>> > DDR controller doesn't decode those bits to begin with. However,
>> > recent firmware updates for platforms incorporating this SoC allow
>> > the IOMMU to be enabled, which does decode address bits [47:40],
>> > and allocates top down from the IOVA space, producing DMA addresses
>> > that have bits set that have been left unconnected.
>> >
>> > Both the DT and ACPI (IORT) descriptions of the platform take this
>> > into account, and only describe a DMA address space of 40 bits
>> > (using either dma-ranges DT properties, or DMA address limits in
>> > IORT named component nodes). However, even though our IOMMU and bus
>> > layers may take such limitations into account by setting a narrower
>> > DMA mask when creating the platform device, the netsec probe()
>> > entrypoint follows the common practice of setting the DMA mask
>> > uncondionally, according to the capabilities of the IP block itself
>> > rather than to its integration into the chip.
>> >
>> > It is currently unclear what the correct fix is here. We could hack
>> > around it by only setting the DMA mask if it deviates from its
>> > default value of DMA_BIT_MASK(32). However, this makes it
>> > impossible for the bus layer to use DMA_BIT_MASK(32) as the bus
>> > limit, and so it appears that a more comprehensive approach is
>> > required to take DMA limits imposed by the SoC as a whole into
>> > account.
>> >
>> > In the mean time, let's limit the DMA mask to 40 bits. Given that
>> > there is currently only one SoC that incorporates this IP, this is
>> > a reasonable approach that can be backported to -stable and buys us
>> > some time to come up with a proper fix going forward.
>> >
>> I am sure you already thought about it, but why not let the platform
>> specify the bit mask for the driver (via some "bus-width" property),
>> to override the default 64 bit mask?
>
> Because lack of a property to describe the integration is not the
> problem. There are already at least two ways: the general DT/IORT
> properties for describing DMA addressing - which it would be a bit
> ungainly for a driver to parse for this reason, but not impossible -
....


> and inferring it from a SoC-specific compatible - which is more
> appropriate, and what we happen to be able to do here.
>
Sorry, I am not sure I follow. This patch changes from 64-bits default
to 40-bits capability without checking for the parent SoC. If the next
generation implements the full 64-bit or just 32-bit bus, we'll be
back in the pit again. No?

Thanks.

^ permalink raw reply

* Re: [PATCH] net: netsec: reduce DMA mask to 40 bits
From: Jassi Brar @ 2018-05-26  3:44 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Ard Biesheuvel, netdev, David S. Miller, Masahisa Kojima,
	Ilias Apalodimas, nd
In-Reply-To: <CAJe_ZhecUdx5oE8yXAoNRvpUr3WWFbN0jBy8zMsXyRfcGMyfjQ@mail.gmail.com>

On 26 May 2018 at 08:56, Jassi Brar <jaswinder.singh@linaro.org> wrote:
> On 26 May 2018 at 01:07, Robin Murphy <robin.murphy@arm.com> wrote:
>> On Sat, 26 May 2018 00:33:05 +0530
>> Jassi Brar <jaswinder.singh@linaro.org> wrote:
>>
>>> On 25 May 2018 at 18:20, Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>> wrote:
>>> > The netsec network controller IP can drive 64 address bits for DMA,
>>> > and the DMA mask is set accordingly in the driver. However, the
>>> > SynQuacer SoC, which is the only silicon incorporating this IP at
>>> > the moment, integrates this IP in a manner that leaves address bits
>>> > [63:40] unconnected.
>>> >
>>> > Up until now, this has not resulted in any problems, given that the
>>> > DDR controller doesn't decode those bits to begin with. However,
>>> > recent firmware updates for platforms incorporating this SoC allow
>>> > the IOMMU to be enabled, which does decode address bits [47:40],
>>> > and allocates top down from the IOVA space, producing DMA addresses
>>> > that have bits set that have been left unconnected.
>>> >
>>> > Both the DT and ACPI (IORT) descriptions of the platform take this
>>> > into account, and only describe a DMA address space of 40 bits
>>> > (using either dma-ranges DT properties, or DMA address limits in
>>> > IORT named component nodes). However, even though our IOMMU and bus
>>> > layers may take such limitations into account by setting a narrower
>>> > DMA mask when creating the platform device, the netsec probe()
>>> > entrypoint follows the common practice of setting the DMA mask
>>> > uncondionally, according to the capabilities of the IP block itself
>>> > rather than to its integration into the chip.
>>> >
>>> > It is currently unclear what the correct fix is here. We could hack
>>> > around it by only setting the DMA mask if it deviates from its
>>> > default value of DMA_BIT_MASK(32). However, this makes it
>>> > impossible for the bus layer to use DMA_BIT_MASK(32) as the bus
>>> > limit, and so it appears that a more comprehensive approach is
>>> > required to take DMA limits imposed by the SoC as a whole into
>>> > account.
>>> >
>>> > In the mean time, let's limit the DMA mask to 40 bits. Given that
>>> > there is currently only one SoC that incorporates this IP, this is
>>> > a reasonable approach that can be backported to -stable and buys us
>>> > some time to come up with a proper fix going forward.
>>> >
>>> I am sure you already thought about it, but why not let the platform
>>> specify the bit mask for the driver (via some "bus-width" property),
>>> to override the default 64 bit mask?
>>
>> Because lack of a property to describe the integration is not the
>> problem. There are already at least two ways: the general DT/IORT
>> properties for describing DMA addressing - which it would be a bit
>> ungainly for a driver to parse for this reason, but not impossible -
> ....
>
>
>> and inferring it from a SoC-specific compatible - which is more
>> appropriate, and what we happen to be able to do here.
>>
> Sorry, I am not sure I follow. This patch changes from 64-bits default
> to 40-bits capability without checking for the parent SoC. If the next
> generation implements the full 64-bit or just 32-bit bus, we'll be
> back in the pit again. No?
>
Probably you meant we'll change the ethernet compatible string for
differently capable SoC. OK, but here it is more of integration issue
than controller version.

Which makes me realise the extant compatible property for netsec is
not so correct (it embeds the platform name). So I am ok either way.

Thanks.

^ permalink raw reply

* [PATCH net-next 00/14]  nfp: abm: RED/MQ qdisc offload
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski

Hi!

This is second batch of advanced buffer management nfp driver
changes.  This series adds the qdisc offload.  Support for
a very simple subset of RED qdisc offload is added as needed
for DCTCP ECN marking (min and max thresholds set to the same
value).

The first two patches fix glitches introduced by the previous
series.  We have to be careful about phys_port_name handling,
because VFs share the same code path, and some user space may
get confused by the names we chose.

Since unlike previous offloads we can report the queue backlog
both in bytes and packets we need to adjust how statistics are
added up in the core (patch 6).

There are some extra statistics we want to expose which don't
fit into TC stats, namely counts of packets which have been fast-
-forwarded without getting enqueued because there was no
contention and number of packets that were ever queued (sum of
all momentary backlogs).  We expose those through ethtool stats
(patches 8 and 9).

Remaining 5 patches add MQ offload - to be able to set different
configurations on different queues.  Representors are made multi-
-queue and we add offload support to MQ.  MQ stats are added up
before calling ->dump qdiscs on the children, and therefore don't
include updated offload values.  To avoid clearly incorrect stats
MQ is made to also request stats update from offloads.  This way
we can correct the diff at the driver level.


Jakub Kicinski (14):
  nfp: return -EOPNOTSUPP from .ndo_get_phys_port_name for VFs
  nfp: prefix vNIC phys_port_name with 'n'
  nfp: abm: enable advanced queuing on demand
  nfp: abm: add helpers for configuring queue marking levels
  nfp: abm: add simple RED offload
  net: sched: add qstats.qlen to qlen
  nfp: abm: report statistics from RED offload
  nfp: allow apps to add extra stats to ports
  nfp: abm: expose the internal stats in ethtool
  nfp: abm: expose all PF queues
  net: sched: mq: add simple offload notification
  nfp: abm: multi-queue RED offload
  net: sched: mq: request stats from offloads
  nfp: abm: report correct MQ stats

 drivers/net/ethernet/netronome/nfp/abm/ctrl.c | 275 +++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.c | 374 +++++++++++++++++-
 drivers/net/ethernet/netronome/nfp/abm/main.h |  67 ++++
 drivers/net/ethernet/netronome/nfp/nfp_abi.h  |  14 +
 drivers/net/ethernet/netronome/nfp/nfp_app.c  |  22 ++
 drivers/net/ethernet/netronome/nfp/nfp_app.h  |  13 +
 .../ethernet/netronome/nfp/nfp_net_common.c   |  11 +-
 .../ethernet/netronome/nfp/nfp_net_ethtool.c  |  10 +-
 .../net/ethernet/netronome/nfp/nfp_net_repr.c |   5 +-
 .../net/ethernet/netronome/nfp/nfp_net_repr.h |   7 +-
 drivers/net/ethernet/netronome/nfp/nfp_port.h |   2 +
 .../ethernet/netronome/nfp/nfpcore/nfp_cpp.h  |   5 +
 include/linux/netdevice.h                     |   1 +
 include/net/pkt_cls.h                         |  12 +
 include/net/sch_generic.h                     |   4 +-
 net/sched/sch_mq.c                            |  37 ++
 16 files changed, 843 insertions(+), 16 deletions(-)

-- 
2.17.0

^ permalink raw reply

* [PATCH net-next 01/14] nfp: return -EOPNOTSUPP from .ndo_get_phys_port_name for VFs
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

After recent change we started returning 0 from
ndo_get_phys_port_name for VFs.  The name parameter for
ndo_get_phys_port_name is not initialized by the stack so
this can lead to a crash.  We should have kept returning
-EOPNOTSUPP in the first place.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index eea11e881bf5..1f572896d1ee 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3286,11 +3286,12 @@ nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
 	if (nn->port)
 		return nfp_port_get_phys_port_name(netdev, name, len);
 
-	if (!nn->dp.is_vf) {
-		n = snprintf(name, len, "%d", nn->id);
-		if (n >= len)
-			return -EINVAL;
-	}
+	if (nn->dp.is_vf)
+		return -EOPNOTSUPP;
+
+	n = snprintf(name, len, "%d", nn->id);
+	if (n >= len)
+		return -EINVAL;
 
 	return 0;
 }
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 02/14] nfp: prefix vNIC phys_port_name with 'n'
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Some drivers are using a bare number inside phys_port_name
as VF id and OpenStack's regexps will pick it up.  We can't
use a bare number for your vNICs, prefix the names with 'n'.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1f572896d1ee..75110c8d6a90 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3289,7 +3289,7 @@ nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
 	if (nn->dp.is_vf)
 		return -EOPNOTSUPP;
 
-	n = snprintf(name, len, "%d", nn->id);
+	n = snprintf(name, len, "n%d", nn->id);
 	if (n >= len)
 		return -EINVAL;
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 03/14] nfp: abm: enable advanced queuing on demand
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

ABM NIC FW has a cut-through mode where the PCIe queuing
is bypassed, thus working like our standard NIC FWs.  Use this
mode by default and only enable queuing in switchdev mode where
users can configure it.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/ctrl.c | 13 +++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.c | 11 +++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.h |  2 ++
 drivers/net/ethernet/netronome/nfp/nfp_abi.h  | 14 ++++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index e40f6f06417b..676d3afc9bdd 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -36,10 +36,23 @@
 
 #include "../nfpcore/nfp_cpp.h"
 #include "../nfp_app.h"
+#include "../nfp_abi.h"
 #include "../nfp_main.h"
 #include "../nfp_net.h"
 #include "main.h"
 
+int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
+{
+	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
+			    NULL, 0, NULL, 0);
+}
+
+int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm)
+{
+	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_DISABLE,
+			    NULL, 0, NULL, 0);
+}
+
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink)
 {
 	alink->queue_base = nn_readl(alink->vnic, NFP_NET_CFG_START_RXQ);
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 5a12bb20bced..28a18ac62040 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -182,6 +182,7 @@ static enum devlink_eswitch_mode nfp_abm_eswitch_mode_get(struct nfp_app *app)
 static int nfp_abm_eswitch_set_legacy(struct nfp_abm *abm)
 {
 	nfp_abm_kill_reprs_all(abm);
+	nfp_abm_ctrl_qm_disable(abm);
 
 	abm->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
 	return 0;
@@ -200,6 +201,10 @@ static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm)
 	struct nfp_net *nn;
 	int err;
 
+	err = nfp_abm_ctrl_qm_enable(abm);
+	if (err)
+		return err;
+
 	list_for_each_entry(nn, &pf->vnics, vnic_list) {
 		struct nfp_abm_link *alink = nn->app_priv;
 
@@ -217,6 +222,7 @@ static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm)
 
 err_kill_all_reprs:
 	nfp_abm_kill_reprs_all(abm);
+	nfp_abm_ctrl_qm_disable(abm);
 	return err;
 }
 
@@ -350,6 +356,11 @@ static int nfp_abm_init(struct nfp_app *app)
 	if (err)
 		goto err_free_abm;
 
+	/* We start in legacy mode, make sure advanced queuing is disabled */
+	err = nfp_abm_ctrl_qm_disable(abm);
+	if (err)
+		goto err_free_abm;
+
 	err = -ENOMEM;
 	reprs = nfp_reprs_alloc(pf->max_data_vnics);
 	if (!reprs)
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 5938b69b8a84..7d129b205535 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -72,4 +72,6 @@ struct nfp_abm_link {
 
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
+int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm);
+int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_abi.h b/drivers/net/ethernet/netronome/nfp/nfp_abi.h
index 7ffa6e6a9d1c..8b56c27931bf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_abi.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_abi.h
@@ -59,12 +59,26 @@
  * @NFP_MBOX_POOL_SET:	set shared buffer pool info/config
  * Input  - struct nfp_shared_buf_pool_info_set
  * Output - None
+ *
+ * @NFP_MBOX_PCIE_ABM_ENABLE:	enable PCIe-side advanced buffer management
+ * Enable advanced buffer management of the PCIe block.  If ABM is disabled
+ * PCIe block maintains a very short queue of buffers and does tail drop.
+ * ABM allows more advanced buffering and priority control.
+ * Input  - None
+ * Output - None
+ *
+ * @NFP_MBOX_PCIE_ABM_DISABLE:	disable PCIe-side advanced buffer management
+ * Input  - None
+ * Output - None
  */
 enum nfp_mbox_cmd {
 	NFP_MBOX_NO_CMD			= 0x00,
 
 	NFP_MBOX_POOL_GET		= 0x01,
 	NFP_MBOX_POOL_SET		= 0x02,
+
+	NFP_MBOX_PCIE_ABM_ENABLE	= 0x03,
+	NFP_MBOX_PCIE_ABM_DISABLE	= 0x04,
 };
 
 #define NFP_SHARED_BUF_COUNT_SYM_NAME	"_abi_nfd_pf%u_sb_cnt"
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 04/14] nfp: abm: add helpers for configuring queue marking levels
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Queue levels for simple ECN marking are stored in _abi_nfd_out_q_lvls_X
symbol, where X is the PCIe PF id.  Find out the location of that symbol
and add helpers for modifying it.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/ctrl.c | 80 +++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.h |  3 +
 .../ethernet/netronome/nfp/nfpcore/nfp_cpp.h  |  5 ++
 3 files changed, 88 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 676d3afc9bdd..978884a0be19 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -35,12 +35,57 @@
 #include <linux/kernel.h>
 
 #include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nffw.h"
 #include "../nfp_app.h"
 #include "../nfp_abi.h"
 #include "../nfp_main.h"
 #include "../nfp_net.h"
 #include "main.h"
 
+#define NFP_QLVL_SYM_NAME	"_abi_nfd_out_q_lvls_%u"
+#define NFP_QLVL_STRIDE		16
+#define NFP_QLVL_THRS		8
+
+static unsigned long long
+nfp_abm_q_lvl_thrs(struct nfp_abm_link *alink, unsigned int queue)
+{
+	return alink->abm->q_lvls->addr +
+		(alink->queue_base + queue) * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
+}
+
+static int
+nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	u32 muw;
+	int err;
+
+	muw = NFP_CPP_ATOMIC_WR(alink->abm->q_lvls->target,
+				alink->abm->q_lvls->domain);
+
+	err = nfp_cpp_writel(cpp, muw, nfp_abm_q_lvl_thrs(alink, i), val);
+	if (err) {
+		nfp_err(cpp, "RED offload setting level failed on vNIC %d queue %d\n",
+			alink->id, i);
+		return err;
+	}
+
+	return 0;
+}
+
+int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val)
+{
+	int i, err;
+
+	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
+		err = nfp_abm_ctrl_set_q_lvl(alink, i, val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
 {
 	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
@@ -59,13 +104,48 @@ void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink)
 	alink->queue_base /= alink->vnic->stride_rx;
 }
 
+static const struct nfp_rtsym *
+nfp_abm_ctrl_find_rtsym(struct nfp_pf *pf, const char *name, unsigned int size)
+{
+	const struct nfp_rtsym *sym;
+
+	sym = nfp_rtsym_lookup(pf->rtbl, name);
+	if (!sym) {
+		nfp_err(pf->cpp, "Symbol '%s' not found\n", name);
+		return ERR_PTR(-ENOENT);
+	}
+	if (sym->size != size) {
+		nfp_err(pf->cpp,
+			"Symbol '%s' wrong size: expected %u got %llu\n",
+			name, size, sym->size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sym;
+}
+
+static const struct nfp_rtsym *
+nfp_abm_ctrl_find_q_rtsym(struct nfp_pf *pf, const char *name,
+			  unsigned int size)
+{
+	return nfp_abm_ctrl_find_rtsym(pf, name, size * NFP_NET_MAX_RX_RINGS);
+}
+
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
 {
 	struct nfp_pf *pf = abm->app->pf;
+	const struct nfp_rtsym *sym;
 	unsigned int pf_id;
+	char pf_symbol[64];
 
 	pf_id =	nfp_cppcore_pcie_unit(pf->cpp);
 	abm->pf_id = pf_id;
 
+	snprintf(pf_symbol, sizeof(pf_symbol), NFP_QLVL_SYM_NAME, pf_id);
+	sym = nfp_abm_ctrl_find_q_rtsym(pf, pf_symbol, NFP_QLVL_STRIDE);
+	if (IS_ERR(sym))
+		return PTR_ERR(sym);
+	abm->q_lvls = sym;
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 7d129b205535..1ac651cdc140 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -49,11 +49,13 @@ struct nfp_net;
  * @pf_id:	ID of our PF link
  * @eswitch_mode:	devlink eswitch mode, advanced functions only visible
  *			in switchdev mode
+ * @q_lvls:	queue level control area
  */
 struct nfp_abm {
 	struct nfp_app *app;
 	unsigned int pf_id;
 	enum devlink_eswitch_mode eswitch_mode;
+	const struct nfp_rtsym *q_lvls;
 };
 
 /**
@@ -72,6 +74,7 @@ struct nfp_abm_link {
 
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
+int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val);
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm);
 int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index 4e19add1c539..b0da3d436850 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -87,6 +87,11 @@ struct resource;
 
 #define NFP_CPP_TARGET_ID_MASK          0x1f
 
+#define NFP_CPP_ATOMIC_RD(target, island) \
+	NFP_CPP_ISLAND_ID((target), 3, 0, (island))
+#define NFP_CPP_ATOMIC_WR(target, island) \
+	NFP_CPP_ISLAND_ID((target), 4, 0, (island))
+
 /**
  * NFP_CPP_ID() - pack target, token, and action into a CPP ID.
  * @target:     NFP CPP target id
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 05/14] nfp: abm: add simple RED offload
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Offload simple RED configurations.  For now support only DCTCP
like scenarios where min and max are the same.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/main.c | 82 +++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.h | 10 +++
 2 files changed, 92 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 28a18ac62040..22251d88c958 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -38,6 +38,8 @@
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
 
 #include "../nfpcore/nfp.h"
 #include "../nfpcore/nfp_cpp.h"
@@ -55,6 +57,84 @@ static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
 	       FIELD_PREP(NFP_ABM_PORTID_ID, id);
 }
 
+static void
+nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
+		    u32 handle)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+
+	if (handle != alink->qdiscs[0].handle)
+		return;
+
+	alink->qdiscs[0].handle = TC_H_UNSPEC;
+	port->tc_offload_cnt = 0;
+	nfp_abm_ctrl_set_all_q_lvls(alink, ~0);
+}
+
+static int
+nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		    struct tc_red_qopt_offload *opt)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	int err;
+
+	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {
+		nfp_warn(alink->abm->app->cpp,
+			 "RED offload failed - unsupported parameters\n");
+		err = -EINVAL;
+		goto err_destroy;
+	}
+	err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
+	if (err)
+		goto err_destroy;
+
+	alink->qdiscs[0].handle = opt->handle;
+	port->tc_offload_cnt = 1;
+
+	return 0;
+err_destroy:
+	if (alink->qdiscs[0].handle != TC_H_UNSPEC)
+		nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle);
+	return err;
+}
+
+static int
+nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
+		     struct tc_red_qopt_offload *opt)
+{
+	if (opt->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	switch (opt->command) {
+	case TC_RED_REPLACE:
+		return nfp_abm_red_replace(netdev, alink, opt);
+	case TC_RED_DESTROY:
+		nfp_abm_red_destroy(netdev, alink, opt->handle);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
+		 enum tc_setup_type type, void *type_data)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+	struct nfp_port *port;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!port || port->type != NFP_PORT_PF_PORT)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_QDISC_RED:
+		return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
 {
 	enum nfp_repr_type rtype;
@@ -403,6 +483,8 @@ const struct nfp_app_type app_abm = {
 	.vnic_alloc	= nfp_abm_vnic_alloc,
 	.vnic_free	= nfp_abm_vnic_free,
 
+	.setup_tc	= nfp_abm_setup_tc,
+
 	.eswitch_mode_get	= nfp_abm_eswitch_mode_get,
 	.eswitch_mode_set	= nfp_abm_eswitch_mode_set,
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 1ac651cdc140..979f98fb808b 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -58,18 +58,28 @@ struct nfp_abm {
 	const struct nfp_rtsym *q_lvls;
 };
 
+/**
+ * struct nfp_red_qdisc - representation of single RED Qdisc
+ * @handle:	handle of currently offloaded RED Qdisc
+ */
+struct nfp_red_qdisc {
+	u32 handle;
+};
+
 /**
  * struct nfp_abm_link - port tuple of a ABM NIC
  * @abm:	back pointer to nfp_abm
  * @vnic:	data vNIC
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
+ * @qdiscs:	array of qdiscs
  */
 struct nfp_abm_link {
 	struct nfp_abm *abm;
 	struct nfp_net *vnic;
 	unsigned int id;
 	unsigned int queue_base;
+	struct nfp_red_qdisc qdiscs[1];
 };
 
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 06/14] net: sched: add qstats.qlen to qlen
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

AFAICT struct gnet_stats_queue.qlen is not used in Qdiscs.
It may, however, be useful for offloads to report HW queue
length there.  Add that value to the result of qdisc_qlen_sum().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/sch_generic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 98c10a28cd01..0b786c8204b9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -350,14 +350,14 @@ static inline int qdisc_qlen(const struct Qdisc *q)
 
 static inline int qdisc_qlen_sum(const struct Qdisc *q)
 {
-	__u32 qlen = 0;
+	__u32 qlen = q->qstats.qlen;
 	int i;
 
 	if (q->flags & TCQ_F_NOLOCK) {
 		for_each_possible_cpu(i)
 			qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
 	} else {
-		qlen = q->q.qlen;
+		qlen += q->q.qlen;
 	}
 
 	return qlen;
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 07/14] nfp: abm: report statistics from RED offload
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Report basic and extended RED statistics back to TC.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/ctrl.c | 114 ++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.c |  92 ++++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.h |  38 ++++++
 3 files changed, 244 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 978884a0be19..d2d9ca7a727c 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -44,8 +44,15 @@
 
 #define NFP_QLVL_SYM_NAME	"_abi_nfd_out_q_lvls_%u"
 #define NFP_QLVL_STRIDE		16
+#define NFP_QLVL_BLOG_BYTES	0
+#define NFP_QLVL_BLOG_PKTS	4
 #define NFP_QLVL_THRS		8
 
+#define NFP_QMSTAT_SYM_NAME	"_abi_nfdqm%u_stats"
+#define NFP_QMSTAT_STRIDE	32
+#define NFP_QMSTAT_DROP		16
+#define NFP_QMSTAT_ECN		24
+
 static unsigned long long
 nfp_abm_q_lvl_thrs(struct nfp_abm_link *alink, unsigned int queue)
 {
@@ -53,6 +60,55 @@ nfp_abm_q_lvl_thrs(struct nfp_abm_link *alink, unsigned int queue)
 		(alink->queue_base + queue) * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
 }
 
+static int
+nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
+		  unsigned int stride, unsigned int offset, unsigned int i,
+		  bool is_u64, u64 *res)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	u32 val32, mur;
+	u64 val, addr;
+	int err;
+
+	mur = NFP_CPP_ATOMIC_RD(sym->target, sym->domain);
+
+	addr = sym->addr + (alink->queue_base + i) * stride + offset;
+	if (is_u64)
+		err = nfp_cpp_readq(cpp, mur, addr, &val);
+	else
+		err = nfp_cpp_readl(cpp, mur, addr, &val32);
+	if (err) {
+		nfp_err(cpp,
+			"RED offload reading stat failed on vNIC %d queue %d\n",
+			alink->id, i);
+		return err;
+	}
+
+	*res = is_u64 ? val : val32;
+	return 0;
+}
+
+static int
+nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
+		      unsigned int stride, unsigned int offset, bool is_u64,
+		      u64 *res)
+{
+	u64 val, sum = 0;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
+		err = nfp_abm_ctrl_stat(alink, sym, stride, offset, i,
+					is_u64, &val);
+		if (err)
+			return err;
+		sum += val;
+	}
+
+	*res = sum;
+	return 0;
+}
+
 static int
 nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
 {
@@ -86,6 +142,58 @@ int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val)
 	return 0;
 }
 
+int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
+			    struct nfp_alink_stats *stats)
+{
+	u64 pkts = 0, bytes = 0;
+	int i, err;
+
+	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
+		pkts += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
+		bytes += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
+	}
+	stats->tx_pkts = pkts;
+	stats->tx_bytes = bytes;
+
+	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
+				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
+				    false, &stats->backlog_bytes);
+	if (err)
+		return err;
+
+	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
+				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
+				    false, &stats->backlog_pkts);
+	if (err)
+		return err;
+
+	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
+				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
+				    true, &stats->drops);
+	if (err)
+		return err;
+
+	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
+				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
+				     true, &stats->overlimits);
+}
+
+int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
+			     struct nfp_alink_xstats *xstats)
+{
+	int err;
+
+	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
+				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
+				    true, &xstats->pdrop);
+	if (err)
+		return err;
+
+	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
+				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
+				     true, &xstats->ecn_marked);
+}
+
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
 {
 	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
@@ -147,5 +255,11 @@ int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
 		return PTR_ERR(sym);
 	abm->q_lvls = sym;
 
+	snprintf(pf_symbol, sizeof(pf_symbol), NFP_QMSTAT_SYM_NAME, pf_id);
+	sym = nfp_abm_ctrl_find_q_rtsym(pf, pf_symbol, NFP_QMSTAT_STRIDE);
+	if (IS_ERR(sym))
+		return PTR_ERR(sym);
+	abm->qm_stats = sym;
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 22251d88c958..d0c21899a8b7 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -40,6 +40,7 @@
 #include <linux/slab.h>
 #include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
+#include <net/red.h>
 
 #include "../nfpcore/nfp.h"
 #include "../nfpcore/nfp_cpp.h"
@@ -57,6 +58,23 @@ static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
 	       FIELD_PREP(NFP_ABM_PORTID_ID, id);
 }
 
+static int nfp_abm_reset_stats(struct nfp_abm_link *alink)
+{
+	int err;
+
+	err = nfp_abm_ctrl_read_stats(alink, &alink->qdiscs[0].stats);
+	if (err)
+		return err;
+	alink->qdiscs[0].stats.backlog_pkts = 0;
+	alink->qdiscs[0].stats.backlog_bytes = 0;
+
+	err = nfp_abm_ctrl_read_xstats(alink, &alink->qdiscs[0].xstats);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static void
 nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
 		    u32 handle)
@@ -88,16 +106,86 @@ nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 	if (err)
 		goto err_destroy;
 
+	/* Reset stats only on new qdisc */
+	if (alink->qdiscs[0].handle != opt->handle) {
+		err = nfp_abm_reset_stats(alink);
+		if (err)
+			goto err_destroy;
+	}
+
 	alink->qdiscs[0].handle = opt->handle;
 	port->tc_offload_cnt = 1;
 
 	return 0;
 err_destroy:
+	/* If the qdisc keeps on living, but we can't offload undo changes */
+	if (alink->qdiscs[0].handle == opt->handle) {
+		opt->set.qstats->qlen -= alink->qdiscs[0].stats.backlog_pkts;
+		opt->set.qstats->backlog -=
+			alink->qdiscs[0].stats.backlog_bytes;
+	}
 	if (alink->qdiscs[0].handle != TC_H_UNSPEC)
 		nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle);
 	return err;
 }
 
+static void
+nfp_abm_update_stats(struct nfp_alink_stats *new, struct nfp_alink_stats *old,
+		     struct tc_qopt_offload_stats *stats)
+{
+	_bstats_update(stats->bstats, new->tx_bytes - old->tx_bytes,
+		       new->tx_pkts - old->tx_pkts);
+	stats->qstats->qlen += new->backlog_pkts - old->backlog_pkts;
+	stats->qstats->backlog += new->backlog_bytes - old->backlog_bytes;
+	stats->qstats->overlimits += new->overlimits - old->overlimits;
+	stats->qstats->drops += new->drops - old->drops;
+}
+
+static int
+nfp_abm_red_stats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+{
+	struct nfp_alink_stats *prev_stats;
+	struct nfp_alink_stats stats;
+	int err;
+
+	if (alink->qdiscs[0].handle != opt->handle)
+		return -EOPNOTSUPP;
+	prev_stats = &alink->qdiscs[0].stats;
+
+	err = nfp_abm_ctrl_read_stats(alink, &stats);
+	if (err)
+		return err;
+
+	nfp_abm_update_stats(&stats, prev_stats, &opt->stats);
+
+	*prev_stats = stats;
+
+	return 0;
+}
+
+static int
+nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+{
+	struct nfp_alink_xstats *prev_xstats;
+	struct nfp_alink_xstats xstats;
+	int err;
+
+	if (alink->qdiscs[0].handle != opt->handle)
+		return -EOPNOTSUPP;
+	prev_xstats = &alink->qdiscs[0].xstats;
+
+	err = nfp_abm_ctrl_read_xstats(alink, &xstats);
+	if (err)
+		return err;
+
+	opt->xstats->forced_mark += xstats.ecn_marked - prev_xstats->ecn_marked;
+	opt->xstats->pdrop += xstats.pdrop - prev_xstats->pdrop;
+
+	*prev_xstats = xstats;
+
+	return 0;
+}
+
 static int
 nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 		     struct tc_red_qopt_offload *opt)
@@ -111,6 +199,10 @@ nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	case TC_RED_DESTROY:
 		nfp_abm_red_destroy(netdev, alink, opt->handle);
 		return 0;
+	case TC_RED_STATS:
+		return nfp_abm_red_stats(alink, opt);
+	case TC_RED_XSTATS:
+		return nfp_abm_red_xstats(alink, opt);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 979f98fb808b..93a3b79cf468 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -50,20 +50,54 @@ struct nfp_net;
  * @eswitch_mode:	devlink eswitch mode, advanced functions only visible
  *			in switchdev mode
  * @q_lvls:	queue level control area
+ * @qm_stats:	queue statistics symbol
  */
 struct nfp_abm {
 	struct nfp_app *app;
 	unsigned int pf_id;
 	enum devlink_eswitch_mode eswitch_mode;
 	const struct nfp_rtsym *q_lvls;
+	const struct nfp_rtsym *qm_stats;
+};
+
+/**
+ * struct nfp_alink_stats - ABM NIC statistics
+ * @tx_pkts:		number of TXed packets
+ * @tx_bytes:		number of TXed bytes
+ * @backlog_pkts:	momentary backlog length (packets)
+ * @backlog_bytes:	momentary backlog length (bytes)
+ * @overlimits:		number of ECN marked TXed packets (accumulative)
+ * @drops:		number of tail-dropped packets (accumulative)
+ */
+struct nfp_alink_stats {
+	u64 tx_pkts;
+	u64 tx_bytes;
+	u64 backlog_pkts;
+	u64 backlog_bytes;
+	u64 overlimits;
+	u64 drops;
+};
+
+/**
+ * struct nfp_alink_xstats - extended ABM NIC statistics
+ * @ecn_marked:		number of ECN marked TXed packets
+ * @pdrop:		number of hard drops due to queue limit
+ */
+struct nfp_alink_xstats {
+	u64 ecn_marked;
+	u64 pdrop;
 };
 
 /**
  * struct nfp_red_qdisc - representation of single RED Qdisc
  * @handle:	handle of currently offloaded RED Qdisc
+ * @stats:	statistics from last refresh
+ * @xstats:	base of extended statistics
  */
 struct nfp_red_qdisc {
 	u32 handle;
+	struct nfp_alink_stats stats;
+	struct nfp_alink_xstats xstats;
 };
 
 /**
@@ -85,6 +119,10 @@ struct nfp_abm_link {
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
 int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val);
+int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
+			    struct nfp_alink_stats *stats);
+int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
+			     struct nfp_alink_xstats *xstats);
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm);
 int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm);
 #endif
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 08/14] nfp: allow apps to add extra stats to ports
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Allow nfp apps to add extra ethtool stats.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_app.c  | 22 +++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/nfp_app.h  | 13 +++++++++++
 .../ethernet/netronome/nfp/nfp_net_ethtool.c  | 10 +++++++--
 drivers/net/ethernet/netronome/nfp/nfp_port.h |  2 ++
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index c9d8a7ab311e..f28b244f4ee7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -43,6 +43,7 @@
 #include "nfp_main.h"
 #include "nfp_net.h"
 #include "nfp_net_repr.h"
+#include "nfp_port.h"
 
 static const struct nfp_app_type *apps[] = {
 	[NFP_APP_CORE_NIC]	= &app_nic,
@@ -85,6 +86,27 @@ const char *nfp_app_mip_name(struct nfp_app *app)
 	return nfp_mip_name(app->pf->mip);
 }
 
+u64 *nfp_app_port_get_stats(struct nfp_port *port, u64 *data)
+{
+	if (!port || !port->app || !port->app->type->port_get_stats)
+		return data;
+	return port->app->type->port_get_stats(port->app, port, data);
+}
+
+int nfp_app_port_get_stats_count(struct nfp_port *port)
+{
+	if (!port || !port->app || !port->app->type->port_get_stats_count)
+		return 0;
+	return port->app->type->port_get_stats_count(port->app, port);
+}
+
+u8 *nfp_app_port_get_stats_strings(struct nfp_port *port, u8 *data)
+{
+	if (!port || !port->app || !port->app->type->port_get_stats_strings)
+		return data;
+	return port->app->type->port_get_stats_strings(port->app, port, data);
+}
+
 struct sk_buff *
 nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size, gfp_t priority)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 23b99a4e05c2..ee74caacb015 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -90,6 +90,9 @@ extern const struct nfp_app_type app_abm;
  * @repr_stop:	representor netdev stop callback
  * @check_mtu:	MTU change request on a netdev (verify it is valid)
  * @repr_change_mtu:	MTU change request on repr (make and verify change)
+ * @port_get_stats:		get extra ethtool statistics for a port
+ * @port_get_stats_count:	get count of extra statistics for a port
+ * @port_get_stats_strings:	get strings for extra statistics
  * @start:	start application logic
  * @stop:	stop application logic
  * @ctrl_msg_rx:    control message handler
@@ -132,6 +135,12 @@ struct nfp_app_type {
 	int (*repr_change_mtu)(struct nfp_app *app, struct net_device *netdev,
 			       int new_mtu);
 
+	u64 *(*port_get_stats)(struct nfp_app *app,
+			       struct nfp_port *port, u64 *data);
+	int (*port_get_stats_count)(struct nfp_app *app, struct nfp_port *port);
+	u8 *(*port_get_stats_strings)(struct nfp_app *app,
+				      struct nfp_port *port, u8 *data);
+
 	int (*start)(struct nfp_app *app);
 	void (*stop)(struct nfp_app *app);
 
@@ -404,6 +413,10 @@ static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id)
 
 struct nfp_app *nfp_app_from_netdev(struct net_device *netdev);
 
+u64 *nfp_app_port_get_stats(struct nfp_port *port, u64 *data);
+int nfp_app_port_get_stats_count(struct nfp_port *port);
+u8 *nfp_app_port_get_stats_strings(struct nfp_port *port, u8 *data);
+
 struct nfp_reprs *
 nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type);
 struct nfp_reprs *
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index c9016419bfa0..26d1cc4e2906 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -437,7 +437,7 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
-static __printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...)
+__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...)
 {
 	va_list args;
 
@@ -637,6 +637,7 @@ static void nfp_net_get_strings(struct net_device *netdev,
 						     nn->dp.num_tx_rings,
 						     false);
 		data = nfp_mac_get_stats_strings(netdev, data);
+		data = nfp_app_port_get_stats_strings(nn->port, data);
 		break;
 	}
 }
@@ -651,6 +652,7 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
 	data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar,
 				     nn->dp.num_rx_rings, nn->dp.num_tx_rings);
 	data = nfp_mac_get_stats(netdev, data);
+	data = nfp_app_port_get_stats(nn->port, data);
 }
 
 static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
@@ -662,7 +664,8 @@ static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
 		return nfp_vnic_get_sw_stats_count(netdev) +
 		       nfp_vnic_get_hw_stats_count(nn->dp.num_rx_rings,
 						   nn->dp.num_tx_rings) +
-		       nfp_mac_get_stats_count(netdev);
+		       nfp_mac_get_stats_count(netdev) +
+		       nfp_app_port_get_stats_count(nn->port);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -679,6 +682,7 @@ static void nfp_port_get_strings(struct net_device *netdev,
 			data = nfp_vnic_get_hw_stats_strings(data, 0, 0, true);
 		else
 			data = nfp_mac_get_stats_strings(netdev, data);
+		data = nfp_app_port_get_stats_strings(port, data);
 		break;
 	}
 }
@@ -693,6 +697,7 @@ nfp_port_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
 		data = nfp_vnic_get_hw_stats(data, port->vnic, 0, 0);
 	else
 		data = nfp_mac_get_stats(netdev, data);
+	data = nfp_app_port_get_stats(port, data);
 }
 
 static int nfp_port_get_sset_count(struct net_device *netdev, int sset)
@@ -706,6 +711,7 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset)
 			count = nfp_vnic_get_hw_stats_count(0, 0);
 		else
 			count = nfp_mac_get_stats_count(netdev);
+		count += nfp_app_port_get_stats_count(port);
 		return count;
 	default:
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index 18666750456e..51f10ae2d53e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -122,6 +122,8 @@ struct nfp_port {
 extern const struct ethtool_ops nfp_port_ethtool_ops;
 extern const struct switchdev_ops nfp_port_switchdev_ops;
 
+__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...);
+
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 		      void *type_data);
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 09/14] nfp: abm: expose the internal stats in ethtool
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

There is a handful of statistics exposing some internal details
of the implementation.  Expose those via ethtool.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/ctrl.c | 22 ++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.c | 51 +++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.h |  2 +
 3 files changed, 75 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index d2d9ca7a727c..79fc9147c012 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -50,6 +50,8 @@
 
 #define NFP_QMSTAT_SYM_NAME	"_abi_nfdqm%u_stats"
 #define NFP_QMSTAT_STRIDE	32
+#define NFP_QMSTAT_NON_STO	0
+#define NFP_QMSTAT_STO		8
 #define NFP_QMSTAT_DROP		16
 #define NFP_QMSTAT_ECN		24
 
@@ -142,6 +144,26 @@ int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val)
 	return 0;
 }
 
+u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i)
+{
+	u64 val;
+
+	if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, NFP_QMSTAT_STRIDE,
+			      NFP_QMSTAT_NON_STO, i, true, &val))
+		return 0;
+	return val;
+}
+
+u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i)
+{
+	u64 val;
+
+	if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, NFP_QMSTAT_STRIDE,
+			      NFP_QMSTAT_STO, i, true, &val))
+		return 0;
+	return val;
+}
+
 int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
 			    struct nfp_alink_stats *stats)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index d0c21899a8b7..4e89159f13d3 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -497,6 +497,53 @@ static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 	kfree(alink);
 }
 
+static u64 *
+nfp_abm_port_get_stats(struct nfp_app *app, struct nfp_port *port, u64 *data)
+{
+	struct nfp_repr *repr = netdev_priv(port->netdev);
+	struct nfp_abm_link *alink;
+	unsigned int i;
+
+	if (port->type != NFP_PORT_PF_PORT)
+		return data;
+	alink = repr->app_priv;
+	for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) {
+		*data++ = nfp_abm_ctrl_stat_non_sto(alink, i);
+		*data++ = nfp_abm_ctrl_stat_sto(alink, i);
+	}
+	return data;
+}
+
+static int
+nfp_abm_port_get_stats_count(struct nfp_app *app, struct nfp_port *port)
+{
+	struct nfp_repr *repr = netdev_priv(port->netdev);
+	struct nfp_abm_link *alink;
+
+	if (port->type != NFP_PORT_PF_PORT)
+		return 0;
+	alink = repr->app_priv;
+	return alink->vnic->dp.num_r_vecs * 2;
+}
+
+static u8 *
+nfp_abm_port_get_stats_strings(struct nfp_app *app, struct nfp_port *port,
+			       u8 *data)
+{
+	struct nfp_repr *repr = netdev_priv(port->netdev);
+	struct nfp_abm_link *alink;
+	unsigned int i;
+
+	if (port->type != NFP_PORT_PF_PORT)
+		return data;
+	alink = repr->app_priv;
+	for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) {
+		data = nfp_pr_et(data, "q%u_no_wait", i);
+		data = nfp_pr_et(data, "q%u_delayed", i);
+	}
+	return data;
+}
+
 static int nfp_abm_init(struct nfp_app *app)
 {
 	struct nfp_pf *pf = app->pf;
@@ -575,6 +622,10 @@ const struct nfp_app_type app_abm = {
 	.vnic_alloc	= nfp_abm_vnic_alloc,
 	.vnic_free	= nfp_abm_vnic_free,
 
+	.port_get_stats		= nfp_abm_port_get_stats,
+	.port_get_stats_count	= nfp_abm_port_get_stats_count,
+	.port_get_stats_strings	= nfp_abm_port_get_stats_strings,
+
 	.setup_tc	= nfp_abm_setup_tc,
 
 	.eswitch_mode_get	= nfp_abm_eswitch_mode_get,
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 93a3b79cf468..09fd15847961 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -123,6 +123,8 @@ int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
 			    struct nfp_alink_stats *stats);
 int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
 			     struct nfp_alink_xstats *xstats);
+u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
+u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i);
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm);
 int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm);
 #endif
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 10/14] nfp: abm: expose all PF queues
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Allocate the PF representor as multi-queue to allow setting
the configuration per-queue.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/main.c     | 10 +++++++---
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c |  5 +++--
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.h |  7 ++++++-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 4e89159f13d3..ef77d7b0d99d 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -255,14 +255,18 @@ nfp_abm_spawn_repr(struct nfp_app *app, struct nfp_abm_link *alink,
 	struct nfp_reprs *reprs;
 	struct nfp_repr *repr;
 	struct nfp_port *port;
+	unsigned int txqs;
 	int err;
 
-	if (ptype == NFP_PORT_PHYS_PORT)
+	if (ptype == NFP_PORT_PHYS_PORT) {
 		rtype = NFP_REPR_TYPE_PHYS_PORT;
-	else
+		txqs = 1;
+	} else {
 		rtype = NFP_REPR_TYPE_PF;
+		txqs = alink->vnic->max_rx_rings;
+	}
 
-	netdev = nfp_repr_alloc(app);
+	netdev = nfp_repr_alloc_mqs(app, txqs, 1);
 	if (!netdev)
 		return -ENOMEM;
 	repr = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 117eca6819de..d7b712f6362f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -360,12 +360,13 @@ void nfp_repr_free(struct net_device *netdev)
 	__nfp_repr_free(netdev_priv(netdev));
 }
 
-struct net_device *nfp_repr_alloc(struct nfp_app *app)
+struct net_device *
+nfp_repr_alloc_mqs(struct nfp_app *app, unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *netdev;
 	struct nfp_repr *repr;
 
-	netdev = alloc_etherdev(sizeof(*repr));
+	netdev = alloc_etherdev_mqs(sizeof(*repr), txqs, rxqs);
 	if (!netdev)
 		return NULL;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index 8366e4f3c623..1bf2b18109ab 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -126,7 +126,8 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 		  u32 cmsg_port_id, struct nfp_port *port,
 		  struct net_device *pf_netdev);
 void nfp_repr_free(struct net_device *netdev);
-struct net_device *nfp_repr_alloc(struct nfp_app *app);
+struct net_device *
+nfp_repr_alloc_mqs(struct nfp_app *app, unsigned int txqs, unsigned int rxqs);
 void nfp_repr_clean_and_free(struct nfp_repr *repr);
 void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs);
 void nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
@@ -134,4 +135,8 @@ void nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
 struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs);
 int nfp_reprs_resync_phys_ports(struct nfp_app *app);
 
+static inline struct net_device *nfp_repr_alloc(struct nfp_app *app)
+{
+	return nfp_repr_alloc_mqs(app, 1, 1);
+}
 #endif /* NFP_NET_REPR_H */
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 11/14] net: sched: mq: add simple offload notification
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

mq offload is trivial, we just need to let the device know
that the root qdisc is mq.  Alternative approach would be
to export qdisc_lookup() and make drivers check the root
type themselves, but notification via ndo_setup_tc is more
in line with other qdiscs.

Note that mq doesn't hold any stats on it's own, it just
adds up stats of its children.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/linux/netdevice.h |  1 +
 include/net/pkt_cls.h     | 10 ++++++++++
 net/sched/sch_mq.c        | 19 +++++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8452f72087ef..29ef76360cc8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -791,6 +791,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_CBS,
 	TC_SETUP_QDISC_RED,
 	TC_SETUP_QDISC_PRIO,
+	TC_SETUP_QDISC_MQ,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index f3ec43725724..942f839dbca4 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -778,6 +778,16 @@ struct tc_qopt_offload_stats {
 	struct gnet_stats_queue *qstats;
 };
 
+enum tc_mq_command {
+	TC_MQ_CREATE,
+	TC_MQ_DESTROY,
+};
+
+struct tc_mq_qopt_offload {
+	enum tc_mq_command command;
+	u32 handle;
+};
+
 enum tc_red_command {
 	TC_RED_REPLACE,
 	TC_RED_DESTROY,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f062a18e9162..6ccf6daa2503 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <net/netlink.h>
+#include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
 #include <net/sch_generic.h>
 
@@ -23,12 +24,28 @@ struct mq_sched {
 	struct Qdisc		**qdiscs;
 };
 
+static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_mq_qopt_offload opt = {
+		.command = cmd,
+		.handle = sch->handle,
+	};
+
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
+}
+
 static void mq_destroy(struct Qdisc *sch)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct mq_sched *priv = qdisc_priv(sch);
 	unsigned int ntx;
 
+	mq_offload(sch, TC_MQ_DESTROY);
+
 	if (!priv->qdiscs)
 		return;
 	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
@@ -70,6 +87,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	sch->flags |= TCQ_F_MQROOT;
+
+	mq_offload(sch, TC_MQ_CREATE);
 	return 0;
 }
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 12/14] nfp: abm: multi-queue RED offload
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

Add support for MQ offload and setting RED parameters
on queue-by-queue basis.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/ctrl.c |  50 ++++-
 drivers/net/ethernet/netronome/nfp/abm/main.c | 192 ++++++++++++++----
 drivers/net/ethernet/netronome/nfp/abm/main.h |  14 +-
 3 files changed, 208 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 79fc9147c012..b157ccd8c80f 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -111,8 +111,7 @@ nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
 	return 0;
 }
 
-static int
-nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
+int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
 {
 	struct nfp_cpp *cpp = alink->abm->app->cpp;
 	u32 muw;
@@ -164,6 +163,37 @@ u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i)
 	return val;
 }
 
+int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
+			      struct nfp_alink_stats *stats)
+{
+	int err;
+
+	stats->tx_pkts = nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
+	stats->tx_bytes = nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
+
+	err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls,
+				NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
+				i, false, &stats->backlog_bytes);
+	if (err)
+		return err;
+
+	err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls,
+				NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
+				i, false, &stats->backlog_pkts);
+	if (err)
+		return err;
+
+	err = nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
+				NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
+				i, true, &stats->drops);
+	if (err)
+		return err;
+
+	return nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
+				 NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
+				 i, true, &stats->overlimits);
+}
+
 int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
 			    struct nfp_alink_stats *stats)
 {
@@ -200,6 +230,22 @@ int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
 				     true, &stats->overlimits);
 }
 
+int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
+			       struct nfp_alink_xstats *xstats)
+{
+	int err;
+
+	err = nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
+				NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
+				i, true, &xstats->pdrop);
+	if (err)
+		return err;
+
+	return nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
+				 NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
+				 i, true, &xstats->ecn_marked);
+}
+
 int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
 			     struct nfp_alink_xstats *xstats)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index ef77d7b0d99d..21d5af1fb061 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -58,43 +58,77 @@ static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
 	       FIELD_PREP(NFP_ABM_PORTID_ID, id);
 }
 
-static int nfp_abm_reset_stats(struct nfp_abm_link *alink)
+static int
+__nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
+		     u32 handle, unsigned int qs, u32 init_val)
 {
-	int err;
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	int ret;
 
-	err = nfp_abm_ctrl_read_stats(alink, &alink->qdiscs[0].stats);
-	if (err)
-		return err;
-	alink->qdiscs[0].stats.backlog_pkts = 0;
-	alink->qdiscs[0].stats.backlog_bytes = 0;
+	ret = nfp_abm_ctrl_set_all_q_lvls(alink, init_val);
+	memset(alink->qdiscs, 0, sizeof(*alink->qdiscs) * alink->num_qdiscs);
 
-	err = nfp_abm_ctrl_read_xstats(alink, &alink->qdiscs[0].xstats);
-	if (err)
-		return err;
+	alink->parent = handle;
+	alink->num_qdiscs = qs;
+	port->tc_offload_cnt = qs;
 
-	return 0;
+	return ret;
+}
+
+static void
+nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
+		   u32 handle, unsigned int qs)
+{
+	__nfp_abm_reset_root(netdev, alink, handle, qs, ~0);
+}
+
+static int
+nfp_abm_red_find(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+{
+	unsigned int i = TC_H_MIN(opt->parent) - 1;
+
+	if (opt->parent == TC_H_ROOT)
+		i = 0;
+	else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent))
+		i = TC_H_MIN(opt->parent) - 1;
+	else
+		return -EOPNOTSUPP;
+
+	if (i >= alink->num_qdiscs || opt->handle != alink->qdiscs[i].handle)
+		return -EOPNOTSUPP;
+
+	return i;
 }
 
 static void
 nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
 		    u32 handle)
 {
-	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	unsigned int i;
 
-	if (handle != alink->qdiscs[0].handle)
+	for (i = 0; i < alink->num_qdiscs; i++)
+		if (handle == alink->qdiscs[i].handle)
+			break;
+	if (i == alink->num_qdiscs)
 		return;
 
-	alink->qdiscs[0].handle = TC_H_UNSPEC;
-	port->tc_offload_cnt = 0;
-	nfp_abm_ctrl_set_all_q_lvls(alink, ~0);
+	if (alink->parent == TC_H_ROOT) {
+		nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
+	} else {
+		nfp_abm_ctrl_set_q_lvl(alink, i, ~0);
+		memset(&alink->qdiscs[i], 0, sizeof(*alink->qdiscs));
+	}
 }
 
 static int
 nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 		    struct tc_red_qopt_offload *opt)
 {
-	struct nfp_port *port = nfp_port_from_netdev(netdev);
-	int err;
+	bool existing;
+	int i, err;
+
+	i = nfp_abm_red_find(alink, opt);
+	existing = i >= 0;
 
 	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {
 		nfp_warn(alink->abm->app->cpp,
@@ -102,30 +136,62 @@ nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 		err = -EINVAL;
 		goto err_destroy;
 	}
-	err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
-	if (err)
-		goto err_destroy;
 
-	/* Reset stats only on new qdisc */
-	if (alink->qdiscs[0].handle != opt->handle) {
-		err = nfp_abm_reset_stats(alink);
+	if (existing) {
+		if (alink->parent == TC_H_ROOT)
+			err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
+		else
+			err = nfp_abm_ctrl_set_q_lvl(alink, i, opt->set.min);
 		if (err)
 			goto err_destroy;
+		return 0;
 	}
 
-	alink->qdiscs[0].handle = opt->handle;
-	port->tc_offload_cnt = 1;
+	if (opt->parent == TC_H_ROOT) {
+		i = 0;
+		err = __nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 1,
+					   opt->set.min);
+	} else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent)) {
+		i = TC_H_MIN(opt->parent) - 1;
+		err = nfp_abm_ctrl_set_q_lvl(alink, i, opt->set.min);
+	} else {
+		return -EINVAL;
+	}
+	/* Set the handle to try full clean up, in case IO failed */
+	alink->qdiscs[i].handle = opt->handle;
+	if (err)
+		goto err_destroy;
+
+	if (opt->parent == TC_H_ROOT)
+		err = nfp_abm_ctrl_read_stats(alink, &alink->qdiscs[i].stats);
+	else
+		err = nfp_abm_ctrl_read_q_stats(alink, i,
+						&alink->qdiscs[i].stats);
+	if (err)
+		goto err_destroy;
+
+	if (opt->parent == TC_H_ROOT)
+		err = nfp_abm_ctrl_read_xstats(alink,
+					       &alink->qdiscs[i].xstats);
+	else
+		err = nfp_abm_ctrl_read_q_xstats(alink, i,
+						 &alink->qdiscs[i].xstats);
+	if (err)
+		goto err_destroy;
+
+	alink->qdiscs[i].stats.backlog_pkts = 0;
+	alink->qdiscs[i].stats.backlog_bytes = 0;
 
 	return 0;
 err_destroy:
 	/* If the qdisc keeps on living, but we can't offload undo changes */
-	if (alink->qdiscs[0].handle == opt->handle) {
-		opt->set.qstats->qlen -= alink->qdiscs[0].stats.backlog_pkts;
+	if (existing) {
+		opt->set.qstats->qlen -= alink->qdiscs[i].stats.backlog_pkts;
 		opt->set.qstats->backlog -=
-			alink->qdiscs[0].stats.backlog_bytes;
+			alink->qdiscs[i].stats.backlog_bytes;
 	}
-	if (alink->qdiscs[0].handle != TC_H_UNSPEC)
-		nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle);
+	nfp_abm_red_destroy(netdev, alink, opt->handle);
+
 	return err;
 }
 
@@ -146,13 +212,17 @@ nfp_abm_red_stats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
 {
 	struct nfp_alink_stats *prev_stats;
 	struct nfp_alink_stats stats;
-	int err;
+	int i, err;
 
-	if (alink->qdiscs[0].handle != opt->handle)
-		return -EOPNOTSUPP;
-	prev_stats = &alink->qdiscs[0].stats;
+	i = nfp_abm_red_find(alink, opt);
+	if (i < 0)
+		return i;
+	prev_stats = &alink->qdiscs[i].stats;
 
-	err = nfp_abm_ctrl_read_stats(alink, &stats);
+	if (alink->parent == TC_H_ROOT)
+		err = nfp_abm_ctrl_read_stats(alink, &stats);
+	else
+		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
 	if (err)
 		return err;
 
@@ -168,13 +238,17 @@ nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
 {
 	struct nfp_alink_xstats *prev_xstats;
 	struct nfp_alink_xstats xstats;
-	int err;
+	int i, err;
 
-	if (alink->qdiscs[0].handle != opt->handle)
-		return -EOPNOTSUPP;
-	prev_xstats = &alink->qdiscs[0].xstats;
+	i = nfp_abm_red_find(alink, opt);
+	if (i < 0)
+		return i;
+	prev_xstats = &alink->qdiscs[i].xstats;
 
-	err = nfp_abm_ctrl_read_xstats(alink, &xstats);
+	if (alink->parent == TC_H_ROOT)
+		err = nfp_abm_ctrl_read_xstats(alink, &xstats);
+	else
+		err = nfp_abm_ctrl_read_q_xstats(alink, i, &xstats);
 	if (err)
 		return err;
 
@@ -190,9 +264,6 @@ static int
 nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 		     struct tc_red_qopt_offload *opt)
 {
-	if (opt->parent != TC_H_ROOT)
-		return -EOPNOTSUPP;
-
 	switch (opt->command) {
 	case TC_RED_REPLACE:
 		return nfp_abm_red_replace(netdev, alink, opt);
@@ -208,6 +279,24 @@ nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	}
 }
 
+static int
+nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+		    struct tc_mq_qopt_offload *opt)
+{
+	switch (opt->command) {
+	case TC_MQ_CREATE:
+		nfp_abm_reset_root(netdev, alink, opt->handle,
+				   alink->total_queues);
+		return 0;
+	case TC_MQ_DESTROY:
+		if (opt->handle == alink->parent)
+			nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int
 nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
 		 enum tc_setup_type type, void *type_data)
@@ -220,6 +309,8 @@ nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 
 	switch (type) {
+	case TC_SETUP_QDISC_MQ:
+		return nfp_abm_setup_tc_mq(netdev, repr->app_priv, type_data);
 	case TC_SETUP_QDISC_RED:
 		return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data);
 	default:
@@ -473,13 +564,21 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 	alink->abm = abm;
 	alink->vnic = nn;
 	alink->id = id;
+	alink->parent = TC_H_ROOT;
+	alink->total_queues = alink->vnic->max_rx_rings;
+	alink->qdiscs = kvzalloc(sizeof(*alink->qdiscs) * alink->total_queues,
+				 GFP_KERNEL);
+	if (!alink->qdiscs) {
+		err = -ENOMEM;
+		goto err_free_alink;
+	}
 
 	/* This is a multi-host app, make sure MAC/PHY is up, but don't
 	 * make the MAC/PHY state follow the state of any of the ports.
 	 */
 	err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
 	if (err < 0)
-		goto err_free_alink;
+		goto err_free_qdiscs;
 
 	netif_keep_dst(nn->dp.netdev);
 
@@ -488,6 +587,8 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 
 	return 0;
 
+err_free_qdiscs:
+	kvfree(alink->qdiscs);
 err_free_alink:
 	kfree(alink);
 	return err;
@@ -498,6 +599,7 @@ static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 	struct nfp_abm_link *alink = nn->app_priv;
 
 	nfp_abm_kill_reprs(alink->abm, alink);
+	kvfree(alink->qdiscs);
 	kfree(alink);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 09fd15847961..934a70835473 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -106,6 +106,9 @@ struct nfp_red_qdisc {
  * @vnic:	data vNIC
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
+ * @total_queues:	number of PF queues
+ * @parent:	handle of expected parent, i.e. handle of MQ, or TC_H_ROOT
+ * @num_qdiscs:	number of currently used qdiscs
  * @qdiscs:	array of qdiscs
  */
 struct nfp_abm_link {
@@ -113,16 +116,25 @@ struct nfp_abm_link {
 	struct nfp_net *vnic;
 	unsigned int id;
 	unsigned int queue_base;
-	struct nfp_red_qdisc qdiscs[1];
+	unsigned int total_queues;
+	u32 parent;
+	unsigned int num_qdiscs;
+	struct nfp_red_qdisc *qdiscs;
 };
 
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
 int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val);
+int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i,
+			   u32 val);
 int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
 			    struct nfp_alink_stats *stats);
+int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
+			      struct nfp_alink_stats *stats);
 int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
 			     struct nfp_alink_xstats *xstats);
+int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
+			       struct nfp_alink_xstats *xstats);
 u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
 u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i);
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm);
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 13/14] net: sched: mq: request stats from offloads
From: Jakub Kicinski @ 2018-05-26  4:53 UTC (permalink / raw)
  To: davem
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or, Jakub Kicinski
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

MQ doesn't hold any statistics on its own, however, statistic
from offloads are requested starting from the root, hence MQ
will read the old values for its sums.  Call into the drivers,
because of the additive nature of the stats drivers are aware
of how much "pending updates" they have to children of the MQ.
Since MQ reset its stats on every dump we can simply offset
the stats, predicting how stats of offloaded children will
change.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/pkt_cls.h |  2 ++
 net/sched/sch_mq.c    | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 942f839dbca4..a3c1a2c47cd4 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -781,11 +781,13 @@ struct tc_qopt_offload_stats {
 enum tc_mq_command {
 	TC_MQ_CREATE,
 	TC_MQ_DESTROY,
+	TC_MQ_STATS,
 };
 
 struct tc_mq_qopt_offload {
 	enum tc_mq_command command;
 	u32 handle;
+	struct tc_qopt_offload_stats stats;
 };
 
 enum tc_red_command {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 6ccf6daa2503..d6b8ae4ed7a3 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -38,6 +38,22 @@ static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
 }
 
+static void mq_offload_stats(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_mq_qopt_offload opt = {
+		.command = TC_MQ_STATS,
+		.handle = sch->handle,
+		.stats = {
+			.bstats = &sch->bstats,
+			.qstats = &sch->qstats,
+		},
+	};
+
+	if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc)
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
+}
+
 static void mq_destroy(struct Qdisc *sch)
 {
 	struct net_device *dev = qdisc_dev(sch);
@@ -146,6 +162,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
 			sch->q.qlen		+= qdisc->q.qlen;
 			sch->bstats.bytes	+= qdisc->bstats.bytes;
 			sch->bstats.packets	+= qdisc->bstats.packets;
+			sch->qstats.qlen	+= qdisc->qstats.qlen;
 			sch->qstats.backlog	+= qdisc->qstats.backlog;
 			sch->qstats.drops	+= qdisc->qstats.drops;
 			sch->qstats.requeues	+= qdisc->qstats.requeues;
@@ -154,6 +171,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 		spin_unlock_bh(qdisc_lock(qdisc));
 	}
+	mq_offload_stats(sch);
 
 	return 0;
 }
-- 
2.17.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox