From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
Maxim Mikityanskiy <maximmi@nvidia.com>
Subject: [PATCH net-next 10/16] net/mlx5e: xsk: Use xsk_buff_alloc_batch on striding RQ
Date: Fri, 30 Sep 2022 09:28:57 -0700 [thread overview]
Message-ID: <20220930162903.62262-11-saeed@kernel.org> (raw)
In-Reply-To: <20220930162903.62262-1-saeed@kernel.org>
From: Maxim Mikityanskiy <maximmi@nvidia.com>
XSK provides a function to allocate frames in batches for more efficient
processing. This commit starts using this function on striding RQ and
creates an optimized flow for XSK. A side effect is an opportunity to
optimize the regular RX flow by dropping branching for XSK cases.
Performance improvement is up to 6.4% in the aligned mode and up to 7.5%
in the unaligned mode.
Aligned mode, 2048-byte frames: 12.9 Mpps -> 13.8 Mpps
Aligned mode, 4096-byte frames: 11.8 Mpps -> 12.5 Mpps
Unaligned mode, 2048-byte frames: 11.9 Mpps -> 12.8 Mpps
Unaligned mode, 3072-byte frames: 11.4 Mpps -> 12.1 Mpps
Unaligned mode, 4096-byte frames: 11.0 Mpps -> 11.2 Mpps
CPU: Intel(R) Xeon(R) Gold 6240 CPU @ 2.60GHz
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en/txrx.h | 7 ++
.../ethernet/mellanox/mlx5/core/en/xsk/rx.c | 88 ++++++++++++++++++-
.../ethernet/mellanox/mlx5/core/en/xsk/rx.h | 1 +
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 58 +++---------
4 files changed, 106 insertions(+), 48 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index f4f306bb8e6d..4456ad5cedf1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -452,4 +452,11 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size
return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
}
+
+static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
+{
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+
+ return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 812a370f6aea..7bd49f0b1271 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -8,6 +8,90 @@
/* RX data path */
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &icosq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int batch, i;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
+ goto err;
+
+ BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ rq->mpwqe.pages_per_wqe);
+
+ /* If batch < pages_per_wqe, either:
+ * 1. Some (or all) descriptors were invalid.
+ * 2. dma_need_sync is true, and it fell back to allocating one frame.
+ * In either case, try to continue allocating frames one by one, until
+ * the first error, which will mean there are no more valid descriptors.
+ */
+ for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
+ wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!wi->alloc_units[batch].xsk))
+ goto err_reuse_batch;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ if (unlikely(rq->mpwqe.unaligned)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ }
+ } else {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
+
+ offset = ix * rq->mpwqe.mtts_per_wqe;
+ if (likely(!rq->mpwqe.unaligned))
+ offset = MLX5_ALIGNED_MTTS_OCTW(offset);
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ icosq->pc += rq->mpwqe.umr_wqebbs;
+
+ icosq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_reuse_batch:
+ while (--batch >= 0)
+ xsk_buff_free(wi->alloc_units[batch].xsk);
+
+err:
+ rq->stats->buff_alloc_err++;
+ return -ENOMEM;
+}
+
int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -112,7 +196,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(head_offset);
- xdp->data_end = xdp->data + cqe_bcnt;
+ xsk_buff_set_size(xdp, cqe_bcnt);
xdp_set_data_meta_invalid(xdp);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
@@ -159,7 +243,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(wi->offset);
- xdp->data_end = xdp->data + cqe_bcnt;
+ xsk_buff_set_size(xdp, cqe_bcnt);
xdp_set_data_meta_invalid(xdp);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index 7898a78237b8..84a496a8d72f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -9,6 +9,7 @@
/* RX data path */
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5f411c29157f..329702e185a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -75,13 +75,6 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
.handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
};
-static struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
-{
- size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
-
- return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
-}
-
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
return config->rx_filter == HWTSTAMP_FILTER_ALL;
@@ -668,15 +661,6 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
int err;
int i;
- /* Check in advance that we have enough frames, instead of allocating
- * one-by-one, failing and moving frames to the Reuse Ring.
- */
- if (rq->xsk_pool &&
- unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) {
- err = -ENOMEM;
- goto err;
- }
-
if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
err = mlx5e_alloc_rx_hd_mpwqe(rq);
if (unlikely(err))
@@ -687,33 +671,16 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
- if (unlikely(rq->mpwqe.unaligned)) {
- for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
- dma_addr_t addr;
-
- err = mlx5e_page_alloc(rq, au);
- if (unlikely(err))
- goto err_unmap;
- /* Unaligned means XSK. */
- addr = xsk_buff_xdp_get_frame_dma(au->xsk);
- umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
- .key = rq->mkey_be,
- .va = cpu_to_be64(addr),
- };
- }
- } else {
- for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
- dma_addr_t addr;
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
+ dma_addr_t addr;
- err = mlx5e_page_alloc(rq, au);
- if (unlikely(err))
- goto err_unmap;
- addr = rq->xsk_pool ? xsk_buff_xdp_get_frame_dma(au->xsk) :
- page_pool_get_dma_addr(au->page);
- umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
- .ptag = cpu_to_be64(addr | MLX5_EN_WR),
- };
- }
+ err = mlx5e_page_alloc_pool(rq, au);
+ if (unlikely(err))
+ goto err_unmap;
+ addr = page_pool_get_dma_addr(au->page);
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
}
bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
@@ -723,9 +690,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR);
- offset = ix * rq->mpwqe.mtts_per_wqe;
- if (!rq->mpwqe.unaligned)
- offset = MLX5_ALIGNED_MTTS_OCTW(offset);
+ offset = MLX5_ALIGNED_MTTS_OCTW(ix * rq->mpwqe.mtts_per_wqe);
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
@@ -1016,7 +981,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
head = rq->mpwqe.actual_wq_head;
i = missing;
do {
- alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+ alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
+ mlx5e_alloc_rx_mpwqe(rq, head);
if (unlikely(alloc_err))
break;
--
2.37.3
next prev parent reply other threads:[~2022-09-30 16:30 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-30 16:28 [PATCH net-next 00/16] mlx5 xsk updates part3 2022-09-30 Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 01/16] net/mlx5e: xsk: Use mlx5e_trigger_napi_icosq for XSK wakeup Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 02/16] net/mlx5e: xsk: Drop the check for XSK state in mlx5e_xsk_wakeup Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 03/16] net/mlx5e: Introduce wqe_index_mask for legacy RQ Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 04/16] net/mlx5e: Make the wqe_index_mask calculation more exact Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 05/16] net/mlx5e: Use partial batches in legacy RQ Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 06/16] net/mlx5e: xsk: Use partial batches in legacy RQ with XSK Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 07/16] net/mlx5e: Remove the outer loop when allocating legacy RQ WQEs Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 08/16] net/mlx5e: xsk: Split out WQE allocation for legacy XSK RQ Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 09/16] net/mlx5e: xsk: Use xsk_buff_alloc_batch on legacy RQ Saeed Mahameed
2022-09-30 16:28 ` Saeed Mahameed [this message]
2022-09-30 16:28 ` [PATCH net-next 11/16] net/mlx5e: Use non-XSK page allocator in SHAMPO Saeed Mahameed
2022-09-30 16:28 ` [PATCH net-next 12/16] net/mlx5e: Call mlx5e_page_release_dynamic directly where possible Saeed Mahameed
2022-09-30 16:29 ` [PATCH net-next 13/16] net/mlx5e: Optimize RQ page deallocation Saeed Mahameed
2022-09-30 16:29 ` [PATCH net-next 14/16] net/mlx5e: xsk: Support XDP metadata on XSK RQs Saeed Mahameed
2022-09-30 16:29 ` [PATCH net-next 15/16] net/mlx5e: Introduce the mlx5e_flush_rq function Saeed Mahameed
2022-09-30 16:29 ` [PATCH net-next 16/16] net/mlx5e: xsk: Use queue indices starting from 0 for XSK queues Saeed Mahameed
2022-10-01 20:40 ` [PATCH net-next 00/16] mlx5 xsk updates part3 2022-09-30 patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220930162903.62262-11-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=maximmi@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=tariqt@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.