From: saeed@kernel.org
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org, Maxim Mikityanskiy <maximmi@mellanox.com>,
Tariq Toukan <tariqt@mellanox.com>,
Saeed Mahameed <saeedm@nvidia.com>
Subject: [net-next V3 08/12] net/mlx5e: Support multiple SKBs in a TX WQE
Date: Mon, 21 Sep 2020 19:47:00 -0700 [thread overview]
Message-ID: <20200922024704.544482-9-saeed@kernel.org> (raw)
In-Reply-To: <20200922024704.544482-1-saeed@kernel.org>
From: Maxim Mikityanskiy <maximmi@mellanox.com>
TX MPWQE support for SKBs is coming in one of the following patches, and
a single MPWQE can send multiple SKBs. This commit prepares the TX path
code to handle such cases:
1. An additional FIFO for SKBs is added, just like the FIFO for DMA
chunks.
2. struct mlx5e_tx_wqe_info will contain num_fifo_pkts. If a given WQE
contains only one packet, num_fifo_pkts will be zero, and the SKB will
be stored in mlx5e_tx_wqe_info, as usual. If num_fifo_pkts > 0, the SKB
pointer will be NULL, and the SKBs will be stored in the FIFO.
This change has no performance impact in TCP single stream test and
XDP_TX single stream test.
When compiled with a recent GCC, this change shows no visible
performance impact on UDP pktgen (burst 32) single stream test either:
Packet rate: 16.95 Mpps (±0.15 Mpps) -> 16.96 Mpps (±0.12 Mpps)
Instructions per packet: 429 -> 421
Cycles per packet: 160 -> 156
Instructions per cycle: 2.69 -> 2.70
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++
.../net/ethernet/mellanox/mlx5/core/en/txrx.h | 18 +++++
.../mellanox/mlx5/core/en_accel/ktls_txrx.h | 10 ++-
.../net/ethernet/mellanox/mlx5/core/en_main.c | 7 +-
.../net/ethernet/mellanox/mlx5/core/en_tx.c | 69 ++++++++++++++-----
5 files changed, 87 insertions(+), 21 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 95aab8b429cf..04c6ff2386bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -318,11 +318,13 @@ struct mlx5e_txqsq {
/* dirtied @completion */
u16 cc;
+ u16 skb_fifo_cc;
u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
+ u16 skb_fifo_pc;
u32 dma_fifo_pc;
struct mlx5e_cq cq;
@@ -330,9 +332,11 @@ struct mlx5e_txqsq {
/* read only */
struct mlx5_wq_cyc wq;
u32 dma_fifo_mask;
+ u16 skb_fifo_mask;
struct mlx5e_sq_stats *stats;
struct {
struct mlx5e_sq_dma *dma_fifo;
+ struct sk_buff **skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info;
} db;
void __iomem *uar_map;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 277725c05de4..03fe92323f48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
+ u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS
struct page *resync_dump_frag_page;
#endif
@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type;
}
+static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
+{
+ return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
+}
+
+static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+ struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
+
+ *skb_item = skb;
+}
+
+static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
+{
+ return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
+}
+
static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index fcfb156cf09d..7521c9be735b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc);
-static inline void
+static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
- if (unlikely(wi->resync_dump_frag_page))
+ if (unlikely(wi->resync_dump_frag_page)) {
mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+ return true;
+ }
+ return false;
}
#else
-static inline void
+static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
+ return false;
}
#endif /* CONFIG_MLX5_EN_TLS */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b057a6c3a6d5..c331aa9714f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1047,6 +1047,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
kvfree(sq->db.wqe_info);
+ kvfree(sq->db.skb_fifo);
kvfree(sq->db.dma_fifo);
}
@@ -1058,15 +1059,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa);
+ sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.skb_fifo)),
+ GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa);
- if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+ if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq);
return -ENOMEM;
}
sq->dma_fifo_mask = df_sz - 1;
+ sq->skb_fifo_mask = df_sz - 1;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index aea30399f664..857d1c0397d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -342,6 +342,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
.num_bytes = attr->num_bytes,
.num_dma = num_dma,
.num_wqebbs = wqe_attr->num_wqebbs,
+ .num_fifo_pkts = 0,
};
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
@@ -489,6 +490,18 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
napi_consume_skb(skb, napi_budget);
}
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
+
+ mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+ }
+}
+
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
@@ -534,26 +547,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
- struct sk_buff *skb;
-
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
sqcc += wi->num_wqebbs;
- if (unlikely(!skb)) {
- mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
+
+ npkts++;
+ nbytes += wi->num_bytes;
continue;
}
- mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
- mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+ &dma_fifo_cc)))
+ continue;
- npkts++;
- nbytes += wi->num_bytes;
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
+
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -592,12 +612,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++)
+ dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
+}
+
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
u32 dma_fifo_cc, nbytes = 0;
u16 ci, sqcc, npkts = 0;
- struct sk_buff *skb;
sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc;
@@ -605,20 +632,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
sqcc += wi->num_wqebbs;
- if (!skb) {
- mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ dev_kfree_skb_any(wi->skb);
+
+ npkts++;
+ nbytes += wi->num_bytes;
continue;
}
- mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
- dev_kfree_skb_any(skb);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+ continue;
- npkts++;
- nbytes += wi->num_bytes;
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
+
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
}
sq->dma_fifo_cc = dma_fifo_cc;
--
2.26.2
next prev parent reply other threads:[~2020-09-22 2:48 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-22 2:46 [pull request][net-next V3 00/12] mlx5 Multi packet tx descriptors for SKBs saeed
2020-09-22 2:46 ` [net-next V3 01/12] net/mlx5e: Refactor inline header size calculation in the TX path saeed
2020-09-22 2:46 ` [net-next V3 02/12] net/mlx5e: Use struct assignment to initialize mlx5e_tx_wqe_info saeed
2020-09-22 2:46 ` [net-next V3 03/12] net/mlx5e: Move mlx5e_tx_wqe_inline_mode to en_tx.c saeed
2020-09-22 2:46 ` [net-next V3 04/12] net/mlx5e: Refactor xmit functions saeed
2020-09-22 2:46 ` [net-next V3 05/12] net/mlx5e: Small improvements for XDP TX MPWQE logic saeed
2020-09-22 2:46 ` [net-next V3 06/12] net/mlx5e: Unify constants for WQE_EMPTY_DS_COUNT saeed
2020-09-22 2:46 ` [net-next V3 07/12] net/mlx5e: Move the TLS resync check out of the function saeed
2020-09-22 2:47 ` saeed [this message]
2020-09-22 2:47 ` [net-next V3 09/12] net/mlx5e: Generalize TX MPWQE checks for full session saeed
2020-09-22 2:47 ` [net-next V3 10/12] net/mlx5e: Rename xmit-related structs to generalize them saeed
2020-09-22 2:47 ` [net-next V3 11/12] net/mlx5e: Move TX code into functions to be used by MPWQE saeed
2020-09-22 2:47 ` [net-next V3 12/12] net/mlx5e: Enhanced TX MPWQE for SKBs saeed
2020-09-23 0:45 ` [pull request][net-next V3 00/12] mlx5 Multi packet tx descriptors " David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200922024704.544482-9-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=kuba@kernel.org \
--cc=maximmi@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
--cc=tariqt@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.