* [PATCH net 1/3] net/mlx5e: SHAMPO, Fix header mapping for 64K pages
2025-10-28 6:47 [PATCH net 0/3] net/mlx5e: SHAMPO fixes for 64KB page size Tariq Toukan
@ 2025-10-28 6:47 ` Tariq Toukan
2025-10-29 15:51 ` Simon Horman
2025-10-28 6:47 ` [PATCH net 2/3] net/mlx5e: SHAMPO, Fix skb size check " Tariq Toukan
2025-10-28 6:47 ` [PATCH net 3/3] net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and " Tariq Toukan
2 siblings, 1 reply; 8+ messages in thread
From: Tariq Toukan @ 2025-10-28 6:47 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch, netdev,
linux-rdma, linux-kernel, Gal Pressman, Dragos Tatulea
From: Dragos Tatulea <dtatulea@nvidia.com>
HW-GRO is broken on mlx5 for 64K page sizes. The patch in the fixes tag
didn't take into account larger page sizes when doing an align down
of max_ksm_entries. For 64K page size, max_ksm_entries is 0 which will skip
mapping header pages via WQE UMR. This breaks header-data split
and will result in the following syndrome:
mlx5_core 0000:00:08.0 eth2: Error cqe on cqn 0x4c9, ci 0x0, qn 0x1133, opcode 0xe, syndrome 0x4, vendor syndrome 0x32
00000000: 00 00 00 00 04 4a 00 00 00 00 00 00 20 00 93 32
00000010: 55 00 00 00 fb cc 00 00 00 00 00 00 07 18 00 00
00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4a
00000030: 00 00 3b c7 93 01 32 04 00 00 00 00 00 00 bf e0
mlx5_core 0000:00:08.0 eth2: ERR CQE on RQ: 0x1133
Furthermore, the function that fills in WQE UMRs for the headers
(mlx5e_build_shampo_hd_umr()) only supports mapping page sizes that
fit in a single UMR WQE.
This patch goes back to the old non-aligned max_ksm_entries value and it
changes mlx5e_build_shampo_hd_umr() to support mapping a large page over
multiple UMR WQEs.
This means that mlx5e_build_shampo_hd_umr() can now leave a page only
partially mapped. The caller, mlx5e_build_shampo_hd_umr(), ensures that
there are enough UMR WQEs to cover complete pages by working on
ksm_entries that are multiples of MLX5E_SHAMPO_WQ_HEADER_PER_PAGE.
Fixes: 8a0ee54027b1 ("net/mlx5e: SHAMPO, Simplify UMR allocation for headers")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 34 +++++++++----------
1 file changed, 16 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1c79adc51a04..77f7a1ca091d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -679,25 +679,24 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries);
- WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1));
- while (i < ksm_entries) {
- struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
+ for ( ; i < ksm_entries; i++, index++) {
+ struct mlx5e_frag_page *frag_page;
u64 addr;
- err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page);
- if (unlikely(err))
- goto err_unmap;
+ frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
+ header_offset = mlx5e_shampo_hd_offset(index);
+ if (!header_offset) {
+ err = mlx5e_page_alloc_fragmented(rq->hd_page_pool,
+ frag_page);
+ if (err)
+ goto err_unmap;
+ }
addr = page_pool_get_dma_addr_netmem(frag_page->netmem);
-
- for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) {
- header_offset = mlx5e_shampo_hd_offset(index++);
-
- umr_wqe->inline_ksms[i++] = (struct mlx5_ksm) {
- .key = cpu_to_be32(lkey),
- .va = cpu_to_be64(addr + header_offset + headroom),
- };
- }
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(lkey),
+ .va = cpu_to_be64(addr + header_offset + headroom),
+ };
}
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
@@ -713,7 +712,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
return 0;
err_unmap:
- while (--i) {
+ while (--i >= 0) {
--index;
header_offset = mlx5e_shampo_hd_offset(index);
if (!header_offset) {
@@ -735,8 +734,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_icosq *sq = rq->icosq;
int i, err, max_ksm_entries, len;
- max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev),
- MLX5E_SHAMPO_WQ_HEADER_PER_PAGE);
+ max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev);
ksm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi);
--
2.31.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH net 2/3] net/mlx5e: SHAMPO, Fix skb size check for 64K pages
2025-10-28 6:47 [PATCH net 0/3] net/mlx5e: SHAMPO fixes for 64KB page size Tariq Toukan
2025-10-28 6:47 ` [PATCH net 1/3] net/mlx5e: SHAMPO, Fix header mapping for 64K pages Tariq Toukan
@ 2025-10-28 6:47 ` Tariq Toukan
2025-10-29 15:50 ` Simon Horman
2025-10-28 6:47 ` [PATCH net 3/3] net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and " Tariq Toukan
2 siblings, 1 reply; 8+ messages in thread
From: Tariq Toukan @ 2025-10-28 6:47 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch, netdev,
linux-rdma, linux-kernel, Gal Pressman, Dragos Tatulea
From: Dragos Tatulea <dtatulea@nvidia.com>
mlx5e_hw_gro_skb_has_enough_space() uses a formula to check if there is
enough space in the skb frags to store more data. This formula is
incorrect for 64K page sizes and it triggers early GRO session
termination because the first fragment will blow up beyond
GRO_LEGACY_MAX_SIZE.
This patch adds a special case for page sizes >= GRO_LEGACY_MAX_SIZE
(64K) which will uses the skb->data_len instead. Within this context,
this check will be safe from fragment overflow.
It is expected that the if statement will be optimized out as the
check is done with constants.
Fixes: 92552d3abd32 ("net/mlx5e: HW_GRO cqe handler implementation")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 77f7a1ca091d..ea4e7f486c8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -2350,7 +2350,10 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
{
int nr_frags = skb_shinfo(skb)->nr_frags;
- return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
+ if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE)
+ return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE;
+ else
+ return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
}
static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
--
2.31.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH net 3/3] net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and 64K pages
2025-10-28 6:47 [PATCH net 0/3] net/mlx5e: SHAMPO fixes for 64KB page size Tariq Toukan
2025-10-28 6:47 ` [PATCH net 1/3] net/mlx5e: SHAMPO, Fix header mapping for 64K pages Tariq Toukan
2025-10-28 6:47 ` [PATCH net 2/3] net/mlx5e: SHAMPO, Fix skb size check " Tariq Toukan
@ 2025-10-28 6:47 ` Tariq Toukan
2 siblings, 0 replies; 8+ messages in thread
From: Tariq Toukan @ 2025-10-28 6:47 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch, netdev,
linux-rdma, linux-kernel, Gal Pressman, Dragos Tatulea
From: Dragos Tatulea <dtatulea@nvidia.com>
The MLX5E_SHAMPO_WQ_HEADER_PER_PAGE and
MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE macros are used directly in
several places under the assumption that there will always be more
headers per WQE than headers per page. However, this assumption doesn't
hold for 64K page sizes and higher MTUs (> 4K). This can be first
observed during header page allocation: ksm_entries will become 0 during
alignment to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE.
This patch introduces 2 additional members to the mlx5e_shampo_hd struct
which are meant to be used instead of the macrose mentioned above.
When the number of headers per WQE goes below
MLX5E_SHAMPO_WQ_HEADER_PER_PAGE, clamp the number of headers per
page and expand the header size accordingly so that the headers
for one WQE cover a full page.
All the formulas are adapted to use these two new members.
Fixes: 945ca432bfd0 ("net/mlx5e: SHAMPO, Drop info array")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++
.../net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++++++++++---
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 +++++++++++--------
3 files changed, 41 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 14e3207b14e7..a163f81f07c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -634,7 +634,10 @@ struct mlx5e_dma_info {
struct mlx5e_shampo_hd {
struct mlx5e_frag_page *pages;
u32 hd_per_wq;
+ u32 hd_per_page;
u16 hd_per_wqe;
+ u8 log_hd_per_page;
+ u8 log_hd_entry_size;
unsigned long *bitmap;
u16 pi;
u16 ci;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9c46511e7b43..6023bbbf3f39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -791,8 +791,9 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
int node)
{
void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
+ u8 log_hd_per_page, log_hd_entry_size;
+ u16 hd_per_wq, hd_per_wqe;
u32 hd_pool_size;
- u16 hd_per_wq;
int wq_size;
int err;
@@ -815,11 +816,24 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
if (err)
goto err_umr_mkey;
- rq->mpwqe.shampo->hd_per_wqe =
- mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
+ hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
- hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
- MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
+
+ BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT);
+ if (hd_per_wqe >= MLX5E_SHAMPO_WQ_HEADER_PER_PAGE) {
+ log_hd_per_page = MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE;
+ log_hd_entry_size = MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
+ } else {
+ log_hd_per_page = order_base_2(hd_per_wqe);
+ log_hd_entry_size = order_base_2(PAGE_SIZE / hd_per_wqe);
+ }
+
+ rq->mpwqe.shampo->hd_per_wqe = hd_per_wqe;
+ rq->mpwqe.shampo->hd_per_page = BIT(log_hd_per_page);
+ rq->mpwqe.shampo->log_hd_per_page = log_hd_per_page;
+ rq->mpwqe.shampo->log_hd_entry_size = log_hd_entry_size;
+
+ hd_pool_size = (hd_per_wqe * wq_size) >> log_hd_per_page;
if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) {
/* Separate page pool for shampo headers */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ea4e7f486c8b..e84899a47119 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -648,17 +648,20 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
umr_wqe->hdr.uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}
-static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index)
+static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq,
+ int header_index)
{
- BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT);
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
- return &rq->mpwqe.shampo->pages[header_index >> MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE];
+ return &shampo->pages[header_index >> shampo->log_hd_per_page];
}
-static u64 mlx5e_shampo_hd_offset(int header_index)
+static u64 mlx5e_shampo_hd_offset(struct mlx5e_rq *rq, int header_index)
{
- return (header_index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
- MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u32 hd_per_page = shampo->hd_per_page;
+
+ return (header_index & (hd_per_page - 1)) << shampo->log_hd_entry_size;
}
static void mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index);
@@ -684,7 +687,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
u64 addr;
frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
- header_offset = mlx5e_shampo_hd_offset(index);
+ header_offset = mlx5e_shampo_hd_offset(rq, index);
if (!header_offset) {
err = mlx5e_page_alloc_fragmented(rq->hd_page_pool,
frag_page);
@@ -714,7 +717,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
err_unmap:
while (--i >= 0) {
--index;
- header_offset = mlx5e_shampo_hd_offset(index);
+ header_offset = mlx5e_shampo_hd_offset(rq, index);
if (!header_offset) {
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
@@ -738,7 +741,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
ksm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi);
- ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE);
+ ksm_entries = ALIGN_DOWN(ksm_entries, shampo->hd_per_page);
if (!ksm_entries)
return 0;
@@ -856,7 +859,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
{
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
- if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
+ if (((header_index + 1) & (shampo->hd_per_page - 1)) == 0) {
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);
mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page);
@@ -1219,9 +1222,10 @@ static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb,
static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
{
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);
- u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom;
+ u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index);
+ void *addr = netmem_address(frag_page->netmem);
- return netmem_address(frag_page->netmem) + head_offset;
+ return addr + head_offset + rq->buff.headroom;
}
static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
@@ -2261,7 +2265,8 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5_cqe64 *cqe, u16 header_index)
{
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);
- u16 head_offset = mlx5e_shampo_hd_offset(header_index);
+ u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index);
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
u16 head_size = cqe->shampo.header_size;
u16 rx_headroom = rq->buff.headroom;
struct sk_buff *skb = NULL;
@@ -2277,7 +2282,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
data = hdr + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
- if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
+ if (likely(frag_size <= BIT(shampo->log_hd_entry_size))) {
/* build SKB around header */
dma_sync_single_range_for_cpu(rq->pdev, dma_addr, 0, frag_size, rq->buff.map_dir);
net_prefetchw(hdr);
--
2.31.1
^ permalink raw reply related [flat|nested] 8+ messages in thread