* [PATCH net-next 01/15] net/mlx5e: Make mlx5e_rq_param naming consistent
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 02/15] net/mlx5e: Extract striding rq param calculation in function Tariq Toukan
` (14 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
This structure is used under different names: rq_param, rq_params,
param, rqp. Refactor the code to use a single name: rq_param.
This patch has no functional change.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 +-
.../ethernet/mellanox/mlx5/core/en/params.c | 27 +++++-----
.../net/ethernet/mellanox/mlx5/core/en/ptp.c | 4 +-
.../mellanox/mlx5/core/en/xsk/setup.c | 9 ++--
.../net/ethernet/mellanox/mlx5/core/en_main.c | 50 +++++++++----------
5 files changed, 51 insertions(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ea2cd1f5d1d0..550426979627 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1060,13 +1060,14 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv);
struct mlx5e_xsk_param;
struct mlx5e_rq_param;
-int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param,
struct mlx5e_xsk_param *xsk, int node, u16 q_counter,
struct mlx5e_rq *rq);
#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
void mlx5e_close_rq(struct mlx5e_rq *rq);
-int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter);
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *rq_param,
+ u16 q_counter);
void mlx5e_destroy_rq(struct mlx5e_rq *rq);
bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 8e99d07586fa..3fdaf003e1d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -883,14 +883,16 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_param *param)
+ struct mlx5e_rq_param *rq_param)
{
- void *rqc = param->rqc;
- void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ void *rqc = rq_param->rqc;
u32 lro_timeout;
int ndsegs = 1;
+ void *wq;
int err;
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
@@ -938,11 +940,12 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
}
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info,
- ¶m->xdp_frag_size);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk,
+ &rq_param->frags_info,
+ &rq_param->xdp_frag_size);
if (err)
return err;
- ndsegs = param->frags_info.num_frags;
+ ndsegs = rq_param->frags_info.num_frags;
}
MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
@@ -953,23 +956,23 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
- param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
- mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp);
+ rq_param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ mlx5e_build_rx_cq_param(mdev, params, xsk, &rq_param->cqp);
return 0;
}
void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
- struct mlx5e_rq_param *param)
+ struct mlx5e_rq_param *rq_param)
{
- void *rqc = param->rqc;
+ void *rqc = rq_param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, log_wq_stride,
mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
- param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ rq_param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
}
void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
@@ -1097,7 +1100,7 @@ static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_rq_param *rqp)
+ struct mlx5e_rq_param *rq_param)
{
u32 wqebbs, total_pages, useful_space;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 74660e7fe674..13add74d1b97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -660,13 +660,13 @@ static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
struct net_device *netdev,
struct mlx5e_ptp_params *ptp_params)
{
- struct mlx5e_rq_param *rq_params = &ptp_params->rq_param;
+ struct mlx5e_rq_param *rq_param = &ptp_params->rq_param;
struct mlx5e_params *params = &ptp_params->params;
params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
mlx5e_init_rq_type_params(mdev, params);
params->sw_mtu = netdev->max_mtu;
- mlx5e_build_rq_param(mdev, params, NULL, rq_params);
+ mlx5e_build_rq_param(mdev, params, NULL, rq_param);
}
static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 5981c71cae2d..50c14ad29ed6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -90,8 +90,10 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, c->napi.napi_id);
}
-static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
+static int mlx5e_open_xsk_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param,
+ struct xsk_buff_pool *pool,
struct mlx5e_xsk_param *xsk)
{
u16 q_counter = c->priv->q_counter[c->sd_ix];
@@ -102,7 +104,8 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param
if (err)
return err;
- err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), q_counter, xskrq);
+ err = mlx5e_open_rq(params, rq_param, xsk, cpu_to_node(c->cpu),
+ q_counter, xskrq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7eb691c2a1bd..f2ce24cf56ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -780,7 +780,7 @@ static int mlx5e_create_rq_hd_mkey(struct mlx5_core_dev *mdev,
static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq_param *rq_param,
struct mlx5e_rq *rq,
int node)
{
@@ -791,7 +791,7 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
return 0;
- hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp);
+ hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rq_param);
hd_buf_size = hd_per_wq * BIT(MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE);
nentries = hd_buf_size / PAGE_SIZE;
if (!nentries) {
@@ -852,18 +852,17 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
static int mlx5e_alloc_rq(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq_param *rq_param,
int node, struct mlx5e_rq *rq)
{
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
struct mlx5_core_dev *mdev = rq->mdev;
- void *rqc = rqp->rqc;
- void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
u32 pool_size;
int wq_sz;
int err;
int i;
- rqp->wq.db_numa_node = node;
+ rq_param->wq.db_numa_node = node;
INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work);
@@ -879,8 +878,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
- &rq->wq_ctrl);
+ err = mlx5_wq_ll_create(mdev, &rq_param->wq, rqc_wq,
+ &rq->mpwqe.wq, &rq->wq_ctrl);
if (err)
goto err_rq_xdp_prog;
@@ -925,14 +924,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
if (err)
goto err_rq_mkey;
- err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, node);
+ err = mlx5_rq_shampo_alloc(mdev, params, rq_param, rq, node);
if (err)
goto err_free_mpwqe_info;
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
- err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
- &rq->wq_ctrl);
+ err = mlx5_wq_cyc_create(mdev, &rq_param->wq, rqc_wq,
+ &rq->wqe.wq, &rq->wq_ctrl);
if (err)
goto err_rq_xdp_prog;
@@ -940,7 +939,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
- rq->wqe.info = rqp->frags_info;
+ rq->wqe.info = rq_param->frags_info;
rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
err = mlx5e_init_wqe_alloc_info(rq, node);
@@ -1085,7 +1084,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
xdp_rxq_info_unreg(&rq->xdp_rxq);
}
-int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter)
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *rq_param,
+ u16 q_counter)
{
struct mlx5_core_dev *mdev = rq->mdev;
u8 ts_format;
@@ -1107,7 +1107,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_cou
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
- memcpy(rqc, param->rqc, sizeof(param->rqc));
+ memcpy(rqc, rq_param->rqc, sizeof(rq_param->rqc));
MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
@@ -1323,7 +1323,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
}
-int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param,
struct mlx5e_xsk_param *xsk, int node, u16 q_counter,
struct mlx5e_rq *rq)
{
@@ -1333,11 +1333,11 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
__set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
- err = mlx5e_alloc_rq(params, xsk, param, node, rq);
+ err = mlx5e_alloc_rq(params, xsk, rq_param, node, rq);
if (err)
return err;
- err = mlx5e_create_rq(rq, param, q_counter);
+ err = mlx5e_create_rq(rq, rq_param, q_counter);
if (err)
goto err_free_rq;
@@ -2507,16 +2507,17 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
}
static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_rq_param *rq_params)
+ struct mlx5e_rq_param *rq_param)
{
u16 q_counter = c->priv->q_counter[c->sd_ix];
int err;
- err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq);
+ err = mlx5e_init_rxq_rq(c, params, rq_param->xdp_frag_size, &c->rq);
if (err)
return err;
- return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq);
+ return mlx5e_open_rq(params, rq_param, NULL, cpu_to_node(c->cpu),
+ q_counter, &c->rq);
}
static struct mlx5e_icosq *
@@ -3577,15 +3578,14 @@ static void mlx5e_free_drop_rq(struct mlx5e_rq *rq)
static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
struct mlx5e_rq *rq,
- struct mlx5e_rq_param *param)
+ struct mlx5e_rq_param *rq_param)
{
- void *rqc = param->rqc;
- void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
int err;
- param->wq.db_numa_node = param->wq.buf_numa_node;
+ rq_param->wq.db_numa_node = rq_param->wq.buf_numa_node;
- err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq,
+ err = mlx5_wq_cyc_create(mdev, &rq_param->wq, rqc_wq, &rq->wqe.wq,
&rq->wq_ctrl);
if (err)
return err;
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 02/15] net/mlx5e: Extract striding rq param calculation in function
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 01/15] net/mlx5e: Make mlx5e_rq_param naming consistent Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 03/15] net/mlx5e: Extract max_xsk_wqebbs into its own function Tariq Toukan
` (13 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
Calculating parameters for striding rq is large enough
to deserve its own function. As the names are also very long
it is very easy to hit on the 80 char limitation every time
a change is made. This is an additional sign that it should
be extracted into its own function.
This patch has no functional change.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../ethernet/mellanox/mlx5/core/en/params.c | 106 ++++++++++--------
1 file changed, 62 insertions(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 3fdaf003e1d0..07d75a85ee7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -880,13 +880,70 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param
MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
}
+static int mlx5e_mpwqe_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *rq_param)
+{
+ u8 log_rq_sz = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 log_wqe_num_of_strides, log_wqe_stride_size;
+ enum mlx5e_mpwrq_umr_mode umr_mode;
+ void *rqc = rq_param->rqc;
+ u32 lro_timeout;
+ void *wq;
+
+ log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params,
+ xsk);
+ log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params,
+ xsk);
+ umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+ log_wqe_num_of_strides,
+ page_shift, umr_mode)) {
+ mlx5_core_err(mdev,
+ "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
+ log_wqe_stride_size, log_wqe_num_of_strides,
+ umr_mode);
+ return -EINVAL;
+ }
+
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+ MLX5_SET(wq, wq, log_wq_sz, log_rq_sz);
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO)
+ return 0;
+
+ MLX5_SET(wq, wq, shampo_enable, true);
+ MLX5_SET(wq, wq, log_reservation_size,
+ MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE -
+ MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT);
+ MLX5_SET(wq, wq, log_max_num_of_packets_per_reservation,
+ mlx5e_shampo_get_log_pkt_per_rsrv(params));
+ MLX5_SET(wq, wq, log_headers_entry_size,
+ MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE -
+ MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT);
+ lro_timeout = mlx5e_choose_lro_timeout(mdev,
+ MLX5E_DEFAULT_SHAMPO_TIMEOUT);
+ MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout);
+ MLX5_SET(rqc, rqc, shampo_match_criteria_type,
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED);
+ MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE);
+
+ return 0;
+}
+
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5e_rq_param *rq_param)
{
void *rqc = rq_param->rqc;
- u32 lro_timeout;
int ndsegs = 1;
void *wq;
int err;
@@ -894,50 +951,11 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
wq = MLX5_ADDR_OF(rqc, rqc, wq);
switch (params->rq_wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
- u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
- u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
-
- if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
- log_wqe_num_of_strides,
- page_shift, umr_mode)) {
- mlx5_core_err(mdev,
- "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
- log_wqe_stride_size, log_wqe_num_of_strides,
- umr_mode);
- return -EINVAL;
- }
-
- MLX5_SET(wq, wq, log_wqe_num_of_strides,
- log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
- MLX5_SET(wq, wq, log_wqe_stride_size,
- log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
- MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
- if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO)
- break;
-
- MLX5_SET(wq, wq, shampo_enable, true);
- MLX5_SET(wq, wq, log_reservation_size,
- MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE -
- MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT);
- MLX5_SET(wq, wq,
- log_max_num_of_packets_per_reservation,
- mlx5e_shampo_get_log_pkt_per_rsrv(params));
- MLX5_SET(wq, wq, log_headers_entry_size,
- MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE -
- MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT);
- lro_timeout =
- mlx5e_choose_lro_timeout(mdev,
- MLX5E_DEFAULT_SHAMPO_TIMEOUT);
- MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout);
- MLX5_SET(rqc, rqc, shampo_match_criteria_type,
- MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED);
- MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
- MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE);
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ err = mlx5e_mpwqe_build_rq_param(mdev, params, xsk, rq_param);
+ if (err)
+ return err;
break;
- }
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
err = mlx5e_build_rq_frags_info(mdev, params, xsk,
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 03/15] net/mlx5e: Extract max_xsk_wqebbs into its own function
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 01/15] net/mlx5e: Make mlx5e_rq_param naming consistent Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 02/15] net/mlx5e: Extract striding rq param calculation in function Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 04/15] net/mlx5e: Expose and rename xsk channel parameter function Tariq Toukan
` (12 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
Calculating max_xsk_wqebbs seems large enough to deserve its own
function. It will make upcoming changes easier.
This patch has no functional changes.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../ethernet/mellanox/mlx5/core/en/params.c | 94 ++++++++++---------
1 file changed, 52 insertions(+), 42 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 07d75a85ee7f..be1aa37531de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -1116,18 +1116,15 @@ static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
}
-static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params,
- struct mlx5e_rq_param *rq_param)
+static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
{
- u32 wqebbs, total_pages, useful_space;
-
- /* MLX5_WQ_TYPE_CYCLIC */
- if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
- return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+ struct mlx5e_xsk_param xsk = {0};
+ u32 max_xsk_wqebbs = 0;
+ u8 frame_shift;
- /* UMR WQEs for the regular RQ. */
- wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+ if (!params->xdp_prog)
+ return 0;
/* If XDP program is attached, XSK may be turned on at any time without
* restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
@@ -1139,41 +1136,54 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
* from capabilities. Hence, we have to try all valid values of XSK
* frame size (and page_shift) to find the maximum.
*/
- if (params->xdp_prog) {
- u32 max_xsk_wqebbs = 0;
- u8 frame_shift;
-
- for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
- frame_shift <= PAGE_SHIFT; frame_shift++) {
- /* The headroom doesn't affect the calculation. */
- struct mlx5e_xsk_param xsk = {
- .chunk_size = 1 << frame_shift,
- .unaligned = false,
- };
-
- /* XSK aligned mode. */
- max_xsk_wqebbs = max(max_xsk_wqebbs,
- mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
-
- /* XSK unaligned mode, frame size is a power of two. */
- xsk.unaligned = true;
- max_xsk_wqebbs = max(max_xsk_wqebbs,
- mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
-
- /* XSK unaligned mode, frame size is not equal to stride size. */
- xsk.chunk_size -= 1;
- max_xsk_wqebbs = max(max_xsk_wqebbs,
- mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
-
- /* XSK unaligned mode, frame size is a triple power of two. */
- xsk.chunk_size = (1 << frame_shift) / 4 * 3;
- max_xsk_wqebbs = max(max_xsk_wqebbs,
- mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
- }
+ for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
+ frame_shift <= PAGE_SHIFT; frame_shift++) {
+ u32 total_wqebbs;
- wqebbs += max_xsk_wqebbs;
+ /* The headroom doesn't affect the calculations below. */
+
+ /* XSK aligned mode. */
+ xsk.chunk_size = 1 << frame_shift;
+ xsk.unaligned = false;
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
+
+ /* XSK unaligned mode, frame size is a power of two. */
+ xsk.unaligned = true;
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
+
+ /* XSK unaligned mode, frame size is not equal to stride
+ * size.
+ */
+ xsk.chunk_size -= 1;
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
+
+ /* XSK unaligned mode, frame size is a triple power of two. */
+ xsk.chunk_size = (1 << frame_shift) / 4 * 3;
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
}
+ return max_xsk_wqebbs;
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ u32 wqebbs, total_pages, useful_space;
+
+ /* MLX5_WQ_TYPE_CYCLIC */
+ if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ /* UMR WQEs for the regular RQ. */
+ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+
+ wqebbs += mlx5e_max_xsk_wqebbs(mdev, params);
+
/* UMR WQEs don't cross the page boundary, they are padded with NOPs.
* This padding is always smaller than the max WQE size. That gives us
* at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 04/15] net/mlx5e: Expose and rename xsk channel parameter function
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (2 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 03/15] net/mlx5e: Extract max_xsk_wqebbs into its own function Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 05/15] net/mlx5e: Alloc xsk channel param out of mlx5e_open_xsk() Tariq Toukan
` (11 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
mlx5e_build_xsk_cparam() is meant to be the alternative
to mlx5e_build_channel_param(). It calculates only the parameters
that it requires using the previously configured mlx5e_xsk_param.
Move this function to params.c to be alongside
mlx5e_build_channel_param() and give it a similar name.
Expose the function as it will be needed by upcoming changes.
This patch has no functional changes.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 9 +++++++++
drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 5 +++++
.../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 11 +----------
3 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index be1aa37531de..4d51fad7d9eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -1272,3 +1272,12 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
return 0;
}
+
+void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_channel_param *cparam)
+{
+ mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq);
+ mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 00617c65fe3c..26680985ee39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -138,6 +138,11 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam);
+void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_channel_param *cparam);
+
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 50c14ad29ed6..e3b7e79863ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -48,15 +48,6 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
}
}
-static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_channel_param *cparam)
-{
- mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq);
- mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
-}
-
static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct xsk_buff_pool *pool,
@@ -130,7 +121,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (!cparam)
return -ENOMEM;
- mlx5e_build_xsk_cparam(priv->mdev, params, xsk, cparam);
+ mlx5e_build_xsk_channel_param(priv->mdev, params, xsk, cparam);
err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
&c->xskrq.cq);
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 05/15] net/mlx5e: Alloc xsk channel param out of mlx5e_open_xsk()
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (3 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 04/15] net/mlx5e: Expose and rename xsk channel parameter function Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 06/15] net/mlx5e: Move xsk param into new option container struct Tariq Toukan
` (10 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
Currently the allocation and filling of the xsk channel
parameters was done in mlx5e_open_xsk().
Move this responsibility out of mlx5e_open_xsk() and have
the function take an already filled mlx5e_channel_param.
mlx5e_open_channel() already allocates channel parameters.
The only precaution that is needed is to call
mlx5e_build_xsk_channel_param() before mlx5e_open_xsk().
mlx5e_xsk_enable_locked() now allocates and fills the xsk parameters.
For simplicity, link the xsk parameters in struct mlx5e_channel_params
so that channel params can be passed around.
This patch has no functional changes.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../ethernet/mellanox/mlx5/core/en/params.c | 1 +
.../ethernet/mellanox/mlx5/core/en/params.h | 1 +
.../ethernet/mellanox/mlx5/core/en/xsk/pool.c | 17 +++++++++++++++--
.../ethernet/mellanox/mlx5/core/en/xsk/setup.c | 18 ++++--------------
.../ethernet/mellanox/mlx5/core/en/xsk/setup.h | 4 +++-
.../net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++-
6 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 4d51fad7d9eb..ef88097c1d4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -1278,6 +1278,7 @@ void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
struct mlx5e_channel_param *cparam)
{
+ cparam->xsk = xsk;
mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq);
mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 26680985ee39..c132649dd9f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -42,6 +42,7 @@ struct mlx5e_channel_param {
struct mlx5e_sq_param xdp_sq;
struct mlx5e_sq_param icosq;
struct mlx5e_sq_param async_icosq;
+ struct mlx5e_xsk_param *xsk;
};
struct mlx5e_create_sq_param {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 5c5360a25c64..92bcf16a2019 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -79,6 +79,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
struct xsk_buff_pool *pool, u16 ix)
{
struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel_param *cparam;
struct mlx5e_xsk_param xsk;
struct mlx5e_channel *c;
int err;
@@ -89,15 +90,20 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
return -EINVAL;
+ cparam = kvzalloc_obj(*cparam, GFP_KERNEL);
+ if (!cparam)
+ return -ENOMEM;
+
err = mlx5e_xsk_map_pool(mlx5_sd_ch_ix_get_dev(priv->mdev, ix), pool);
if (unlikely(err))
- return err;
+ goto err_free_cparam;
err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
if (unlikely(err))
goto err_unmap_pool;
mlx5e_build_xsk_param(pool, &xsk);
+ mlx5e_build_xsk_channel_param(priv->mdev, params, &xsk, cparam);
if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
@@ -122,7 +128,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
c = priv->channels.c[ix];
- err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
+ err = mlx5e_open_xsk(priv, params, cparam, pool, c);
if (unlikely(err))
goto err_remove_pool;
@@ -138,6 +144,8 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
mlx5e_deactivate_rq(&c->rq);
mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY);
+ kvfree(cparam);
+
return 0;
err_remove_pool:
@@ -146,6 +154,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
err_unmap_pool:
mlx5e_xsk_unmap_pool(priv, pool);
+err_free_cparam:
+ kvfree(cparam);
+
return err;
validate_closed:
@@ -157,6 +168,8 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
goto err_remove_pool;
}
+ kvfree(cparam);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index e3b7e79863ae..03f1be361701 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -105,10 +105,11 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c,
}
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
+ struct mlx5e_channel_param *cparam,
+ struct xsk_buff_pool *pool,
struct mlx5e_channel *c)
{
- struct mlx5e_channel_param *cparam;
+ struct mlx5e_xsk_param *xsk = cparam->xsk;
struct mlx5e_create_cq_param ccp;
int err;
@@ -117,16 +118,10 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
return -EINVAL;
- cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
- if (!cparam)
- return -ENOMEM;
-
- mlx5e_build_xsk_channel_param(priv->mdev, params, xsk, cparam);
-
err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
&c->xskrq.cq);
if (unlikely(err))
- goto err_free_cparam;
+ return err;
err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk);
if (unlikely(err))
@@ -147,8 +142,6 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (unlikely(err))
goto err_close_tx_cq;
- kvfree(cparam);
-
set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
return 0;
@@ -162,9 +155,6 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
err_close_rx_cq:
mlx5e_close_cq(&c->xskrq.cq);
-err_free_cparam:
- kvfree(cparam);
-
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index 50e111b85efd..fc86d19ea2b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -11,8 +11,10 @@ struct mlx5e_xsk_param;
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev);
+struct mlx5e_channel_param;
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
+ struct mlx5e_channel_param *cparam,
+ struct xsk_buff_pool *pool,
struct mlx5e_channel *c);
void mlx5e_close_xsk(struct mlx5e_channel *c);
void mlx5e_activate_xsk(struct mlx5e_channel *c);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f2ce24cf56ce..35b767105492 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2829,7 +2829,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (xsk_pool) {
mlx5e_build_xsk_param(xsk_pool, &xsk);
- err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
+ mlx5e_build_xsk_channel_param(priv->mdev, params, &xsk, cparam);
+ err = mlx5e_open_xsk(priv, params, cparam, xsk_pool, c);
if (unlikely(err))
goto err_close_queues;
}
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 06/15] net/mlx5e: Move xsk param into new option container struct
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (4 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 05/15] net/mlx5e: Alloc xsk channel param out of mlx5e_open_xsk() Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 07/15] net/mlx5e: Drop unused channel parameters Tariq Toukan
` (9 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
The xsk parameter configuration (struct mlx5e_xsk_param) is passed
around many places during parameter calculation. It is used to contain
channel specific information (as opposed to the global info from
struct mlx5e_params).
Upcoming changes will need to push similar channel specific rq
configuration. Instead of adding one more parameter to all these
functions, create a new container structure that has optional rq
specific parameters. The xsk parameter will be the first of such kind.
The new container struct is itself optional. That means that before
checking its members, it has to be checked itself for validity.
This patch has no functional changes.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +-
.../ethernet/mellanox/mlx5/core/en/params.c | 192 ++++++++++--------
.../ethernet/mellanox/mlx5/core/en/params.h | 38 ++--
.../net/ethernet/mellanox/mlx5/core/en/xdp.c | 5 +-
.../net/ethernet/mellanox/mlx5/core/en/xdp.h | 3 +-
.../ethernet/mellanox/mlx5/core/en/xsk/pool.c | 6 +-
.../mellanox/mlx5/core/en/xsk/setup.c | 31 +--
.../mellanox/mlx5/core/en/xsk/setup.h | 2 +-
.../net/ethernet/mellanox/mlx5/core/en_main.c | 33 +--
9 files changed, 185 insertions(+), 128 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 550426979627..5181d6ab39ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1060,8 +1060,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv);
struct mlx5e_xsk_param;
struct mlx5e_rq_param;
+struct mlx5e_rq_opt_param;
int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param,
- struct mlx5e_xsk_param *xsk, int node, u16 q_counter,
+ struct mlx5e_rq_opt_param *rqo, int node, u16 q_counter,
struct mlx5e_rq *rq);
#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index ef88097c1d4d..97f5d1c2adea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -21,10 +21,14 @@ static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
return min_page_shift ? : 12;
}
-u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq_opt_param *rqo)
{
- u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
+ u8 req_page_shift;
+
+ req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
/* Regular RQ uses order-0 pages, the NIC must be able to map them. */
if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
@@ -34,7 +38,8 @@ u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xs
}
enum mlx5e_mpwrq_umr_mode
-mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq_opt_param *rqo)
{
/* Different memory management schemes use different mechanisms to map
* user-mode memory. The stricter guarantees we have, the faster
@@ -45,7 +50,8 @@ mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
* 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
* mappings can have different sizes.
*/
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
bool unaligned = xsk ? xsk->unaligned : false;
bool oversized = false;
@@ -225,12 +231,12 @@ u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
}
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
u16 headroom;
- if (xsk)
- return xsk->headroom;
+ if (mlx5e_rqo_xsk_param(rqo))
+ return rqo->xsk->headroom;
headroom = NET_IP_ALIGN;
if (params->xdp_prog)
@@ -263,19 +269,23 @@ static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool no_head_
static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
bool mpwqe)
{
+ struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
bool no_head_tail_room;
u32 sz;
/* XSK frames are mapped as individual pages, because frames may come in
* an arbitrary order from random locations in the UMEM.
*/
- if (xsk)
- return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
+ if (xsk) {
+ return mpwqe ?
+ BIT(mlx5e_mpwrq_page_shift(mdev, rqo)) : PAGE_SIZE;
+ }
- no_head_tail_room = params->xdp_prog && mpwqe && !mlx5e_rx_is_linear_skb(mdev, params, xsk);
+ no_head_tail_room = params->xdp_prog && mpwqe &&
+ !mlx5e_rx_is_linear_skb(mdev, params, rqo);
/* When no_head_tail_room is set, headroom and tailroom are excluded from skb calculations.
* no_head_tail_room should be set in the case of XDP with Striding RQ
@@ -291,11 +301,12 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
+ u32 linear_stride_sz =
+ mlx5e_rx_get_linear_stride_sz(mdev, params, rqo, true);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
order_base_2(linear_stride_sz);
@@ -303,8 +314,10 @@ static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
+ struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
+
if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
return false;
@@ -315,7 +328,7 @@ bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
* must fit into a CPU page.
*/
- if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
+ if (mlx5e_rx_get_linear_sz_skb(params, !!xsk) > PAGE_SIZE)
return false;
/* XSK frames must be big enough to hold the packet data. */
@@ -349,12 +362,14 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
- u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
+ u8 log_wqe_num_of_strides =
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, rqo);
+ u8 log_wqe_stride_size =
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
log_wqe_num_of_strides,
@@ -363,18 +378,20 @@ bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
+ u32 linear_stride_sz =
+ mlx5e_rx_get_linear_stride_sz(mdev, params, rqo, true);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
u8 log_num_strides;
u8 log_stride_sz;
u8 log_wqe_sz;
- if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
+ if (!mlx5e_rx_is_linear_skb(mdev, params, rqo))
return false;
- log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ log_stride_sz = order_base_2(linear_stride_sz);
log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
if (log_wqe_sz < log_stride_sz)
@@ -389,13 +406,13 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
- log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
- page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, rqo);
+ page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode);
/* Numbers are unsigned, don't subtract to avoid underflow. */
@@ -423,10 +440,11 @@ static u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5e_params *params)
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
- return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo))
+ return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params,
+ rqo, true));
/* XDP in mlx5e doesn't support multiple packets per page. */
if (params->xdp_prog)
@@ -437,17 +455,18 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
u8 log_wqe_size, log_stride_size;
log_wqe_size = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
- log_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ log_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo);
WARN(log_wqe_size < log_stride_size,
"Log WQE size %u < log stride size %u (page shift %u, umr mode %d, xsk on? %d)\n",
- log_wqe_size, log_stride_size, page_shift, umr_mode, !!xsk);
+ log_wqe_size, log_stride_size, page_shift, umr_mode,
+ rqo && rqo->xsk);
return log_wqe_size - log_stride_size;
}
@@ -459,14 +478,14 @@ u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, rqo);
if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
return linear_headroom;
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo))
return linear_headroom;
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
@@ -535,10 +554,11 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
}
int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
+ struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
u16 max_mtu_pkts;
if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) {
@@ -547,7 +567,7 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
return -EOPNOTSUPP;
}
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) {
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo)) {
mlx5_core_err(mdev, "Striding RQ linear mode for XSK can't be activated with current params\n");
return -EINVAL;
}
@@ -559,7 +579,8 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, xsk->unaligned));
if (params->log_rq_mtu_frames > max_mtu_pkts) {
mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
- 1 << params->log_rq_mtu_frames, xsk->chunk_size);
+ 1 << params->log_rq_mtu_frames,
+ xsk->chunk_size);
return -EINVAL;
}
@@ -672,7 +693,7 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5e_rq_frags_info *info,
u32 *xdp_frag_size)
{
@@ -684,10 +705,11 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
int max_mtu;
int i;
- if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
+ if (mlx5e_rx_is_linear_skb(mdev, params, rqo)) {
int frag_stride;
- frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);
+ frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, rqo,
+ false);
info->arr[0].frag_size = byte_count;
info->arr[0].frag_stride = frag_stride;
@@ -703,7 +725,7 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
goto out;
}
- headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ headroom = mlx5e_get_linear_rq_headroom(params, rqo);
first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
@@ -819,12 +841,13 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
- u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo);
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params,
+ rqo));
int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params));
- int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo));
int wqe_size = BIT(log_stride_sz) * num_strides;
int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE;
@@ -836,7 +859,7 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5e_cq_param *param)
{
bool hw_stridx = false;
@@ -847,10 +870,13 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
- log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
+ log_cq_size =
+ mlx5e_shampo_get_log_cq_size(mdev, params, rqo);
else
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
- mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ log_cq_size =
+ mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params,
+ rqo);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
log_cq_size = params->log_rq_mtu_frames;
@@ -882,22 +908,22 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param
static int mlx5e_mpwqe_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5e_rq_param *rq_param)
{
- u8 log_rq_sz = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 log_rq_sz = mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
u8 log_wqe_num_of_strides, log_wqe_stride_size;
enum mlx5e_mpwrq_umr_mode umr_mode;
void *rqc = rq_param->rqc;
u32 lro_timeout;
void *wq;
- log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params,
- xsk);
- log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params,
- xsk);
- umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ log_wqe_num_of_strides =
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, rqo);
+ log_wqe_stride_size =
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo);
+ umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
@@ -940,7 +966,7 @@ static int mlx5e_mpwqe_build_rq_param(struct mlx5_core_dev *mdev,
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5e_rq_param *rq_param)
{
void *rqc = rq_param->rqc;
@@ -952,13 +978,13 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- err = mlx5e_mpwqe_build_rq_param(mdev, params, xsk, rq_param);
+ err = mlx5e_mpwqe_build_rq_param(mdev, params, rqo, rq_param);
if (err)
return err;
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- err = mlx5e_build_rq_frags_info(mdev, params, xsk,
+ err = mlx5e_build_rq_frags_info(mdev, params, rqo,
&rq_param->frags_info,
&rq_param->xdp_frag_size);
if (err)
@@ -975,7 +1001,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
rq_param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
- mlx5e_build_rx_cq_param(mdev, params, xsk, &rq_param->cqp);
+ mlx5e_build_rx_cq_param(mdev, params, rqo, &rq_param->cqp);
return 0;
}
@@ -1105,20 +1131,22 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
- u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
u8 umr_wqebbs;
umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
- return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ return umr_wqebbs *
+ (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo));
}
static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
+ struct mlx5e_rq_opt_param rqo = {0};
struct mlx5e_xsk_param xsk = {0};
u32 max_xsk_wqebbs = 0;
u8 frame_shift;
@@ -1126,6 +1154,8 @@ static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev,
if (!params->xdp_prog)
return 0;
+ rqo.xsk = &xsk;
+
/* If XDP program is attached, XSK may be turned on at any time without
* restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
* both regular RQ and XSK RQ.
@@ -1145,24 +1175,24 @@ static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev,
/* XSK aligned mode. */
xsk.chunk_size = 1 << frame_shift;
xsk.unaligned = false;
- total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo);
max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
/* XSK unaligned mode, frame size is a power of two. */
xsk.unaligned = true;
- total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo);
max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
/* XSK unaligned mode, frame size is not equal to stride
* size.
*/
xsk.chunk_size -= 1;
- total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo);
max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
/* XSK unaligned mode, frame size is a triple power of two. */
xsk.chunk_size = (1 << frame_shift) / 4 * 3;
- total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk);
+ total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo);
max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs);
}
@@ -1278,7 +1308,7 @@ void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
struct mlx5e_channel_param *cparam)
{
- cparam->xsk = xsk;
- mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq);
+ cparam->rq_opt.xsk = xsk;
+ mlx5e_build_rq_param(mdev, params, &cparam->rq_opt, &cparam->rq);
mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index c132649dd9f2..4bce769d48ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -12,6 +12,10 @@ struct mlx5e_xsk_param {
bool unaligned;
};
+struct mlx5e_rq_opt_param {
+ struct mlx5e_xsk_param *xsk;
+};
+
struct mlx5e_cq_param {
u32 cqc[MLX5_ST_SZ_DW(cqc)];
struct mlx5_wq_param wq;
@@ -38,11 +42,11 @@ struct mlx5e_sq_param {
struct mlx5e_channel_param {
struct mlx5e_rq_param rq;
+ struct mlx5e_rq_opt_param rq_opt;
struct mlx5e_sq_param txq_sq;
struct mlx5e_sq_param xdp_sq;
struct mlx5e_sq_param icosq;
struct mlx5e_sq_param async_icosq;
- struct mlx5e_xsk_param *xsk;
};
struct mlx5e_create_sq_param {
@@ -57,9 +61,11 @@ struct mlx5e_create_sq_param {
/* Striding RQ dynamic parameters */
-u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq_opt_param *rqo);
enum mlx5e_mpwrq_umr_mode
-mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq_opt_param *rqo);
u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode);
u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
enum mlx5e_mpwrq_umr_mode umr_mode);
@@ -81,22 +87,22 @@ u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_param *rq_param);
@@ -106,21 +112,21 @@ u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
/* Build queue parameters */
void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c);
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5e_rq_param *param);
void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_rq_param *param);
@@ -148,7 +154,7 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
+ struct mlx5e_rq_opt_param *rqo);
static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
@@ -164,4 +170,10 @@ static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev,
"enhanced" : "basic");
};
+static inline struct mlx5e_xsk_param *
+mlx5e_rqo_xsk_param(struct mlx5e_rq_opt_param *rqo)
+{
+ return rqo ? rqo->xsk : NULL;
+}
+
#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 80f9fc10877a..04e1b5fa4825 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -37,9 +37,10 @@
#include <linux/bitfield.h>
#include <net/page_pool/helpers.h>
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params,
+ struct mlx5e_rq_opt_param *rqo)
{
- int hr = mlx5e_get_linear_rq_headroom(params, xsk);
+ int hr = mlx5e_get_linear_rq_headroom(params, rqo);
/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
* The condition checked in mlx5e_rx_is_linear_skb is:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 46ab0a9e8cdd..3c54f8962664 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -96,7 +96,8 @@ union mlx5e_xdp_info {
};
struct mlx5e_xsk_param;
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params,
+ struct mlx5e_rq_opt_param *rqo);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
struct bpf_prog *prog, struct mlx5e_xdp_buff *mlctx);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 92bcf16a2019..565e5c4ddcce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -80,6 +80,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
{
struct mlx5e_params *params = &priv->channels.params;
struct mlx5e_channel_param *cparam;
+ enum mlx5e_mpwrq_umr_mode umr_mode;
struct mlx5e_xsk_param xsk;
struct mlx5e_channel *c;
int err;
@@ -105,8 +106,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
mlx5e_build_xsk_param(pool, &xsk);
mlx5e_build_xsk_channel_param(priv->mdev, params, &xsk, cparam);
+ umr_mode = mlx5e_mpwrq_umr_mode(priv->mdev, &cparam->rq_opt);
if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
- mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
+ umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
const char *recommendation = is_power_of_2(xsk.chunk_size) ?
"Upgrade firmware" : "Disable striding RQ";
@@ -163,7 +165,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
/* Check the configuration in advance, rather than fail at a later stage
* (in mlx5e_xdp_set or on open) and end up with no channels.
*/
- if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ if (!mlx5e_validate_xsk_param(params, &cparam->rq_opt, priv->mdev)) {
err = -EINVAL;
goto err_remove_pool;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 03f1be361701..11500fd213a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -9,9 +9,9 @@
static int mlx5e_legacy_rq_validate_xsk(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_rq_opt_param *rqo)
{
- if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
+ if (!mlx5e_rx_is_linear_skb(mdev, params, rqo)) {
mlx5_core_err(mdev, "Legacy RQ linear mode for XSK can't be activated with current params\n");
return -EINVAL;
}
@@ -25,9 +25,14 @@ static int mlx5e_legacy_rq_validate_xsk(struct mlx5_core_dev *mdev,
#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE)
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5_core_dev *mdev)
{
+ struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
+
+ if (WARN_ON(!xsk))
+ return false;
+
/* AF_XDP doesn't support frames larger than PAGE_SIZE,
* and xsk->chunk_size is limited to 65535 bytes.
*/
@@ -42,9 +47,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
*/
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk);
+ return !mlx5e_mpwrq_validate_xsk(mdev, params, rqo);
default: /* MLX5_WQ_TYPE_CYCLIC */
- return !mlx5e_legacy_rq_validate_xsk(mdev, params, xsk);
+ return !mlx5e_legacy_rq_validate_xsk(mdev, params, rqo);
}
}
@@ -83,19 +88,20 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
static int mlx5e_open_xsk_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
- struct mlx5e_rq_param *rq_param,
- struct xsk_buff_pool *pool,
- struct mlx5e_xsk_param *xsk)
+ struct mlx5e_channel_param *cparam,
+ struct xsk_buff_pool *pool)
{
+ struct mlx5e_rq_param *rq_param = &cparam->rq;
+ struct mlx5e_rq_opt_param *rqo = &cparam->rq_opt;
u16 q_counter = c->priv->q_counter[c->sd_ix];
struct mlx5e_rq *xskrq = &c->xskrq;
int err;
- err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq);
+ err = mlx5e_init_xsk_rq(c, params, pool, rqo->xsk, xskrq);
if (err)
return err;
- err = mlx5e_open_rq(params, rq_param, xsk, cpu_to_node(c->cpu),
+ err = mlx5e_open_rq(params, rq_param, rqo, cpu_to_node(c->cpu),
q_counter, xskrq);
if (err)
return err;
@@ -109,13 +115,12 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
struct xsk_buff_pool *pool,
struct mlx5e_channel *c)
{
- struct mlx5e_xsk_param *xsk = cparam->xsk;
struct mlx5e_create_cq_param ccp;
int err;
mlx5e_build_create_cq_param(&ccp, c);
- if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+ if (!mlx5e_validate_xsk_param(params, &cparam->rq_opt, priv->mdev))
return -EINVAL;
err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
@@ -123,7 +128,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (unlikely(err))
return err;
- err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk);
+ err = mlx5e_open_xsk_rq(c, params, cparam, pool);
if (unlikely(err))
goto err_close_rx_cq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index fc86d19ea2b3..664ec78192c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -9,7 +9,7 @@
struct mlx5e_xsk_param;
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_opt_param *rqo,
struct mlx5_core_dev *mdev);
struct mlx5e_channel_param;
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 35b767105492..9e406275e243 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -851,8 +851,8 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
}
static int mlx5e_alloc_rq(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
struct mlx5e_rq_param *rq_param,
+ struct mlx5e_rq_opt_param *rqo,
int node, struct mlx5e_rq *rq)
{
void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
@@ -871,7 +871,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
- rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, rqo);
pool_size = 1 << params->log_rq_mtu_frames;
rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
@@ -891,8 +891,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
- rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
+ rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
rq->mpwqe.pages_per_wqe =
mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
rq->mpwqe.umr_mode);
@@ -904,14 +904,17 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
rq->mpwqe.umr_mode);
pool_size = rq->mpwqe.pages_per_wqe <<
- mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
+ mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo);
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog)
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) &&
+ params->xdp_prog)
pool_size *= 2; /* additional page per packet for the linear part */
- rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ rq->mpwqe.log_stride_sz =
+ mlx5e_mpwqe_get_log_stride_size(mdev, params,
+ rqo);
rq->mpwqe.num_strides =
- BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, rqo));
rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
@@ -947,7 +950,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
goto err_rq_wq_destroy;
}
- if (xsk) {
+ if (mlx5e_rqo_xsk_param(rqo)) {
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL, NULL);
if (err)
@@ -1324,7 +1327,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
}
int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param,
- struct mlx5e_xsk_param *xsk, int node, u16 q_counter,
+ struct mlx5e_rq_opt_param *rqo, int node, u16 q_counter,
struct mlx5e_rq *rq)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -1333,7 +1336,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param,
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
__set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
- err = mlx5e_alloc_rq(params, xsk, rq_param, node, rq);
+ err = mlx5e_alloc_rq(params, rq_param, rqo, node, rq);
if (err)
return err;
@@ -4587,6 +4590,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
for (ix = 0; ix < chs->params.num_channels; ix++) {
struct xsk_buff_pool *xsk_pool =
mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
+ struct mlx5e_rq_opt_param rqo = {0};
struct mlx5e_xsk_param xsk;
int max_xdp_mtu;
@@ -4594,12 +4598,13 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
continue;
mlx5e_build_xsk_param(xsk_pool, &xsk);
- max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk);
+ rqo.xsk = &xsk;
+ max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &rqo);
/* Validate XSK params and XDP MTU in advance */
- if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) ||
+ if (!mlx5e_validate_xsk_param(new_params, &rqo, mdev) ||
new_params->sw_mtu > max_xdp_mtu) {
- u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+ u32 hr = mlx5e_get_linear_rq_headroom(new_params, &rqo);
int max_mtu_frame, max_mtu_page, max_mtu;
/* Two criteria must be met:
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 07/15] net/mlx5e: Drop unused channel parameters
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (5 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 06/15] net/mlx5e: Move xsk param into new option container struct Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 08/15] net/mlx5e: SHAMPO, Always calculate page size Tariq Toukan
` (8 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
The channel parameters from struct mlx5_qmgmt_data are
built in mlx5e_queue_mem_alloc() but are not used.
mlx5e_open_channel() builds the channel parameters internally and those
parameters will be the ones that are used when opening the queue.
This patch drops the unused parameters.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9e406275e243..aca88fed2ac7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5555,7 +5555,6 @@ static const struct netdev_stat_ops mlx5e_stat_ops = {
struct mlx5_qmgmt_data {
struct mlx5e_channel *c;
- struct mlx5e_channel_param cparam;
};
static int mlx5e_queue_mem_alloc(struct net_device *dev,
@@ -5566,7 +5565,6 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_channels *chs = &priv->channels;
struct mlx5e_params params = chs->params;
- struct mlx5_core_dev *mdev;
int err;
mutex_lock(&priv->state_lock);
@@ -5590,11 +5588,6 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev,
goto unlock;
}
- mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index);
- err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam);
- if (err)
- goto unlock;
-
err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c);
unlock:
mutex_unlock(&priv->state_lock);
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 08/15] net/mlx5e: SHAMPO, Always calculate page size
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (6 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 07/15] net/mlx5e: Drop unused channel parameters Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 09/15] net/mlx5e: Set page_pool order based on calculated page_shift Tariq Toukan
` (7 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
Adapt the rx path in SHAMPO mode to calculate page size based on
configured page_shift when dealing with payload data.
This is necessary as an upcoming patch will add support for using
different page sizes.
This change has no functional changes.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 34 ++++++++++++-------
1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index efcfcddab376..fc95ea00666b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1847,11 +1847,14 @@ mlx5e_shampo_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
struct mlx5e_frag_page *frag_page,
u32 data_bcnt, u32 data_offset)
{
+ u32 page_size = BIT(rq->mpwqe.page_shift);
+
net_prefetchw(skb->data);
do {
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
- u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
+ u32 pg_consumed_bytes = min_t(u32, page_size - data_offset,
+ data_bcnt);
unsigned int truesize = pg_consumed_bytes;
mlx5e_add_skb_frag(rq, skb, frag_page, data_offset,
@@ -1872,6 +1875,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
struct mlx5e_frag_page *head_page = frag_page;
struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
+ u32 page_size = BIT(rq->mpwqe.page_shift);
u32 frag_offset = head_offset;
u32 byte_cnt = cqe_bcnt;
struct skb_shared_info *sinfo;
@@ -1926,9 +1930,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
linear_hr = skb_headroom(skb);
linear_data_len = headlen;
linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb));
- if (unlikely(frag_offset >= PAGE_SIZE)) {
+ if (unlikely(frag_offset >= page_size)) {
frag_page++;
- frag_offset -= PAGE_SIZE;
+ frag_offset -= page_size;
}
}
@@ -1940,7 +1944,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
while (byte_cnt) {
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
pg_consumed_bytes =
- min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
+ min_t(u32, page_size - frag_offset, byte_cnt);
if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
truesize += pg_consumed_bytes;
@@ -1978,7 +1982,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
nr_frags_free = old_nr_frags - sinfo->nr_frags;
if (unlikely(nr_frags_free)) {
frag_page -= nr_frags_free;
- truesize -= (nr_frags_free - 1) * PAGE_SIZE +
+ truesize -= (nr_frags_free - 1) * page_size +
ALIGN(pg_consumed_bytes,
BIT(rq->mpwqe.log_stride_sz));
}
@@ -2166,15 +2170,16 @@ mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
rq->hw_gro_data->skb = NULL;
}
-static bool
-mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
+static bool mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb,
+ u16 data_bcnt,
+ u32 page_size)
{
int nr_frags = skb_shinfo(skb)->nr_frags;
- if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE)
+ if (page_size >= GRO_LEGACY_MAX_SIZE)
return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE;
else
- return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
+ return page_size * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
}
static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
@@ -2183,18 +2188,19 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
- u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
- u32 page_idx = wqe_offset >> PAGE_SHIFT;
u16 head_size = cqe->shampo.header_size;
struct sk_buff **skb = &rq->hw_gro_data->skb;
bool flush = cqe->shampo.flush;
bool match = cqe->shampo.match;
+ u32 page_size = BIT(rq->mpwqe.page_shift);
struct mlx5e_rq_stats *stats = rq->stats;
struct mlx5e_rx_wqe_ll *wqe;
struct mlx5e_mpw_info *wi;
struct mlx5_wq_ll *wq;
+ u32 data_offset;
+ u32 page_idx;
wi = mlx5e_get_mpw_info(rq, wqe_id);
wi->consumed_strides += cstrides;
@@ -2210,7 +2216,11 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
goto mpwrq_cqe_out;
}
- if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
+ data_offset = wqe_offset & (page_size - 1);
+ page_idx = wqe_offset >> rq->mpwqe.page_shift;
+ if (*skb &&
+ !(match && mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt,
+ page_size))) {
match = false;
mlx5e_shampo_flush_skb(rq, cqe, match);
}
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 09/15] net/mlx5e: Set page_pool order based on calculated page_shift
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (7 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 08/15] net/mlx5e: SHAMPO, Always calculate page size Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 10/15] net/mlx5e: Alloc rq drop page " Tariq Toukan
` (6 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
Instead of unconditionally setting the page_pool to 0, calculate it from
page_shift for MPWQE case.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index aca88fed2ac7..6344dbb6335e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -857,6 +857,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
{
void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
struct mlx5_core_dev *mdev = rq->mdev;
+ u32 pool_order = 0;
u32 pool_size;
int wq_sz;
int err;
@@ -905,6 +906,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
pool_size = rq->mpwqe.pages_per_wqe <<
mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo);
+ pool_order = rq->mpwqe.page_shift - PAGE_SHIFT;
if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) &&
params->xdp_prog)
@@ -960,7 +962,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
/* Create a page_pool and register it with rxq */
struct page_pool_params pp_params = { 0 };
- pp_params.order = 0;
+ pp_params.order = pool_order;
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
pp_params.pool_size = pool_size;
pp_params.nid = node;
@@ -968,7 +970,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
pp_params.napi = rq->cq.napi;
pp_params.netdev = rq->netdev;
pp_params.dma_dir = rq->buff.map_dir;
- pp_params.max_len = PAGE_SIZE;
+ pp_params.max_len = BIT(PAGE_SHIFT + pool_order);
pp_params.queue_idx = rq->ix;
/* Shampo header data split allow for unreadable netmem */
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 10/15] net/mlx5e: Alloc rq drop page based on calculated page_shift
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (8 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 09/15] net/mlx5e: Set page_pool order based on calculated page_shift Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 11/15] net/mlx5e: RX, Make page frag bias more robust Tariq Toukan
` (5 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
An upcoming patch will allow setting the page order for RX
pages to be greater than 0. Make sure that the drop page will
also be allocated with the right size when that happens.
Take extra care when calculating the drop page size to
account for page_shift < PAGE_SHIFT which can happen for xsk.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++-------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6344dbb6335e..2d3d89707246 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -636,14 +636,18 @@ static void mlx5e_rq_timeout_work(struct work_struct *timeout_work)
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
{
- rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
+ /* xsk can have page_shift < PAGE_SHIFT */
+ u16 page_order = max_t(s16, rq->mpwqe.page_shift - PAGE_SHIFT, 0);
+ u32 page_size = BIT(PAGE_SHIFT + page_order);
+
+ rq->wqe_overflow.page = alloc_pages(GFP_KERNEL, page_order);
if (!rq->wqe_overflow.page)
return -ENOMEM;
rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
- PAGE_SIZE, rq->buff.map_dir);
+ page_size, rq->buff.map_dir);
if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
- __free_page(rq->wqe_overflow.page);
+ __free_pages(rq->wqe_overflow.page, page_order);
return -ENOMEM;
}
return 0;
@@ -651,9 +655,12 @@ static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
{
- dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
- rq->buff.map_dir);
- __free_page(rq->wqe_overflow.page);
+ u16 page_order = max_t(s16, rq->mpwqe.page_shift - PAGE_SHIFT, 0);
+ u32 page_size = BIT(PAGE_SHIFT + page_order);
+
+ dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, page_size,
+ rq->buff.map_dir);
+ __free_pages(rq->wqe_overflow.page, page_order);
}
static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
@@ -884,15 +891,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
if (err)
goto err_rq_xdp_prog;
- err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
- if (err)
- goto err_rq_wq_destroy;
-
rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, rqo);
+ err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
+ if (err)
+ goto err_rq_wq_destroy;
+
rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo);
rq->mpwqe.pages_per_wqe =
mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 11/15] net/mlx5e: RX, Make page frag bias more robust
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (9 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 10/15] net/mlx5e: Alloc rq drop page " Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 12/15] net/mlx5e: Add queue config ops for page size Tariq Toukan
` (4 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
The formula uses the system page size but does not account
for high order pages.
One way to fix this would be to adapt the formula to take
into account the pool order. This would require calculating it
for every allocation or adding an additional rq struct member to
hold the bias max.
However, the above is not really needed as the driver doesn't
check the bias value. It has other means to calculate the expected
number of fragments based on context.
This patch simply sets the value to the max possible value. A sanity
check is added during queue init phase to avoid having really big pages
from using more fragments than the type can fit.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++++
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 --
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5181d6ab39ae..c7ac6ebe8290 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -80,6 +80,7 @@ struct page_pool;
#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define MLX5E_PAGECNT_BIAS_MAX U16_MAX
#define MLX5E_RX_MAX_HEAD (256)
#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8)
#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2d3d89707246..cf977273f753 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -969,6 +969,12 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
/* Create a page_pool and register it with rxq */
struct page_pool_params pp_params = { 0 };
+ if (WARN_ON(BIT(PAGE_SHIFT + pool_order) / 64 >
+ MLX5E_PAGECNT_BIAS_MAX)) {
+ err = -E2BIG;
+ goto err_free_by_rq_type;
+ }
+
pp_params.order = pool_order;
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
pp_params.pool_size = pool_size;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index fc95ea00666b..8fb57a4f36dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -272,8 +272,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem);
}
-#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64)
-
static int mlx5e_page_alloc_fragmented(struct page_pool *pp,
struct mlx5e_frag_page *frag_page)
{
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 12/15] net/mlx5e: Add queue config ops for page size
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (10 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 11/15] net/mlx5e: RX, Make page frag bias more robust Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 13/15] net/mlx5e: Pass netdev queue config to param calculations Tariq Toukan
` (3 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
For now allow only PAGE_SIZE. A subsequent patch will add support for
high order pages in zero-copy mode.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en_main.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cf977273f753..336e384c143a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5572,6 +5572,22 @@ struct mlx5_qmgmt_data {
struct mlx5e_channel *c;
};
+static void mlx5e_queue_default_qcfg(struct net_device *dev,
+ struct netdev_queue_config *qcfg)
+{
+ qcfg->rx_page_size = PAGE_SIZE;
+}
+
+static int mlx5e_queue_validate_qcfg(struct net_device *dev,
+ struct netdev_queue_config *qcfg,
+ struct netlink_ext_ack *extack)
+{
+ if (qcfg->rx_page_size != PAGE_SIZE)
+ return -EINVAL;
+
+ return 0;
+}
+
static int mlx5e_queue_mem_alloc(struct net_device *dev,
struct netdev_queue_config *qcfg,
void *newq, int queue_index)
@@ -5682,6 +5698,9 @@ static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = {
.ndo_queue_start = mlx5e_queue_start,
.ndo_queue_stop = mlx5e_queue_stop,
.ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev,
+ .ndo_default_qcfg = mlx5e_queue_default_qcfg,
+ .ndo_validate_qcfg = mlx5e_queue_validate_qcfg,
+ .supported_params = QCFG_RX_PAGE_SIZE,
};
static void mlx5e_build_nic_netdev(struct net_device *netdev)
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 13/15] net/mlx5e: Pass netdev queue config to param calculations
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (11 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 12/15] net/mlx5e: Add queue config ops for page size Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 14/15] net/mlx5e: Add param helper to calculate max page size Tariq Toukan
` (2 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
If set, take rx_page_size into consideration when calculating
the page shift in Multi Packet WQE mode.
The queue config is saved in the mlx5e_rq_opt_param struct which is
added to the mlx5e_channel_param struct. Now the configuration can be
read from the struct instead of adding it as an argument to all call
sites. For consistency, the queue config is assigned in
mlx5e_build_channel_param().
The queue configuration is read only from queue management ops
as that's the only place where it is currently useful. Furthermore,
netdev_queue_config() expects netdev->queue_mgmt_ops to be
set which is not always the case (representor netdevs).
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../ethernet/mellanox/mlx5/core/en/params.c | 14 ++++++++++++--
.../ethernet/mellanox/mlx5/core/en/params.h | 2 ++
.../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++-------
3 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 97f5d1c2adea..304b46ecc8df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -10,6 +10,7 @@
#include <linux/dim.h>
#include <net/page_pool/types.h>
#include <net/xdp_sock_drv.h>
+#include <net/netdev_queues.h>
#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18
#define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17
@@ -24,11 +25,17 @@ static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev,
struct mlx5e_rq_opt_param *rqo)
{
+ struct netdev_queue_config *qcfg = rqo ? rqo->qcfg : NULL;
struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo);
u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
u8 req_page_shift;
- req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ if (xsk)
+ req_page_shift = order_base_2(xsk->chunk_size);
+ else if (qcfg && qcfg->rx_page_size)
+ req_page_shift = order_base_2(qcfg->rx_page_size);
+ else
+ req_page_shift = PAGE_SHIFT;
/* Regular RQ uses order-0 pages, the NIC must be able to map them. */
if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
@@ -1283,12 +1290,15 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct netdev_queue_config *qcfg,
struct mlx5e_channel_param *cparam)
{
u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
int err;
- err = mlx5e_build_rq_param(mdev, params, NULL, &cparam->rq);
+ cparam->rq_opt.qcfg = qcfg;
+
+ err = mlx5e_build_rq_param(mdev, params, &cparam->rq_opt, &cparam->rq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 4bce769d48ed..5b6d528bce9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -14,6 +14,7 @@ struct mlx5e_xsk_param {
struct mlx5e_rq_opt_param {
struct mlx5e_xsk_param *xsk;
+ struct netdev_queue_config *qcfg;
};
struct mlx5e_cq_param {
@@ -143,6 +144,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param);
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct netdev_queue_config *qcfg,
struct mlx5e_channel_param *cparam);
void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 336e384c143a..59e38e7e067e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2524,8 +2524,10 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
return err;
}
-static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_rq_param *rq_param)
+static int mlx5e_open_rxq_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param,
+ struct mlx5e_rq_opt_param *rqo)
{
u16 q_counter = c->priv->q_counter[c->sd_ix];
int err;
@@ -2534,7 +2536,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
if (err)
return err;
- return mlx5e_open_rq(params, rq_param, NULL, cpu_to_node(c->cpu),
+ return mlx5e_open_rq(params, rq_param, rqo, cpu_to_node(c->cpu),
q_counter, &c->rq);
}
@@ -2638,7 +2640,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
if (err)
goto err_close_icosq;
- err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
+ err = mlx5e_open_rxq_rq(c, params, &cparam->rq, &cparam->rq_opt);
if (err)
goto err_close_sqs;
@@ -2783,6 +2785,7 @@ static void mlx5e_channel_pick_doorbell(struct mlx5e_channel *c)
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
+ struct netdev_queue_config *qcfg,
struct xsk_buff_pool *xsk_pool,
struct mlx5e_channel **cp)
{
@@ -2816,7 +2819,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
goto err_free;
}
- err = mlx5e_build_channel_param(mdev, params, cparam);
+ err = mlx5e_build_channel_param(mdev, params, qcfg, cparam);
if (err)
goto err_free;
@@ -2941,7 +2944,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
if (chs->params.xdp_prog)
xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
- err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]);
+ err = mlx5e_open_channel(priv, i, &chs->params, NULL,
+ xsk_pool, &chs->c[i]);
if (err)
goto err_close_channels;
}
@@ -5619,7 +5623,8 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev,
goto unlock;
}
- err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c);
+ err = mlx5e_open_channel(priv, queue_index, ¶ms, qcfg, NULL,
+ &new->c);
unlock:
mutex_unlock(&priv->state_lock);
return err;
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 14/15] net/mlx5e: Add param helper to calculate max page size
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (12 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 13/15] net/mlx5e: Pass netdev queue config to param calculations Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-23 20:41 ` [PATCH net-next 15/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
2026-02-26 10:10 ` [PATCH net-next 00/15] " patchwork-bot+netdevbpf
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
This function will be necessary to determine the upper limit of
rx-page-size.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 12 ++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 1 +
2 files changed, 13 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 304b46ecc8df..26bb31c56e45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -501,6 +501,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
return 0;
}
+u32 mlx5e_mpwrq_max_page_size(struct mlx5_core_dev *mdev)
+{
+ if (mlx5_core_is_ecpf(mdev))
+ return PAGE_SIZE;
+
+ /* Two MTTs are needed to form an octword. Driver is using a
+ * single page per MTT for simplicity. Hence the limit of having
+ * at least 2 pages per WQE.
+ */
+ return BIT(MLX5_MPWRQ_MAX_LOG_WQE_SZ - 1);
+}
+
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 5b6d528bce9b..9b1a2aed17c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -121,6 +121,7 @@ u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_opt_param *rqo);
+u32 mlx5e_mpwrq_max_page_size(struct mlx5_core_dev *mdev);
/* Build queue parameters */
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH net-next 15/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (13 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 14/15] net/mlx5e: Add param helper to calculate max page size Tariq Toukan
@ 2026-02-23 20:41 ` Tariq Toukan
2026-02-26 10:10 ` [PATCH net-next 00/15] " patchwork-bot+netdevbpf
15 siblings, 0 replies; 17+ messages in thread
From: Tariq Toukan @ 2026-02-23 20:41 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Richard Cochran, netdev, linux-rdma, linux-kernel,
bpf, Gal Pressman, Dragos Tatulea, Cosmin Ratiu, Pavel Begunkov,
David Wei
From: Dragos Tatulea <dtatulea@nvidia.com>
Allow high order pages only when SHAMPO mode is enabled (hw-gro) and the
queue is used for zerocopy (has memory provider ops set). The limit is
128K and it was chosen for the following reasons:
- 256K size requires a special case during MTT calculation to split the
page in two. That's because two MTTs are needed to form an octword.
- Higher sizes require increasing WQE size and/or reducing the number
of WQEs.
- Having the RQ lined with too few large pages can lead to refill
issues.
Results show an increase in BW and a decrease in CPU usage.
The benchmark was done with the zcrx samples from liburing [0].
rx_buf_len=4K, oncpu [1]:
packets=3358832 (MB=820027), rps=55794 (MB/s=13621)
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
Average: 9 1.56 0.00 18.09 13.42 0.00 66.80 0.00 0.00 0.00 0.12
rx_buf_len=128K, oncpu [2]:
packets=3781376 (MB=923187), rps=62813 (MB/s=15335)
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
Average: 9 0.33 0.00 7.61 18.86 0.00 73.08 0.00 0.00 0.00 0.12
rx_buf_len=4K, offcpu [3]:
packets=3460368 (MB=844816), rps=57481 (MB/s=14033)
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
Average: 9 0.00 0.00 0.26 0.00 0.00 92.63 0.00 0.00 0.00 7.11
Average: 11 3.04 0.00 68.09 28.87 0.00 0.00 0.00 0.00 0.00 0.00
rx_buf_len=128K, offcpu [4]:
packets=4119840 (MB=1005820), rps=68435 (MB/s=16707)
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
Average: 9 0.00 0.00 0.87 0.00 0.00 63.77 0.00 0.00 0.00 35.36
Average: 11 1.96 0.00 43.68 54.37 0.00 0.00 0.00 0.00 0.00 0.00
[0] https://github.com/isilence/liburing/tree/zcrx/rx-buf-len
[1] commands:
$> taskset -c 9 ./zcrx 6 -i eth2 -q 9 -A 1 -B 4096 -S 33554432
$> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000
[2] commands:
$> taskset -c 9 ./zcrx 6 -i eth2 -q 9 -A 1 -B 131072 -S 33554432
$> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000
[3] commands:
$> taskset -c 11 ./zcrx 6 -i eth2 -q 9 -A 1 -B 4096 -S 33554432
$> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000
[4] commands:
$> taskset -c 11 ./zcrx 6 -i eth2 -q 9 -A 1 -B 131072 -S 33554432
$> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en_main.c | 36 ++++++++++++++++++-
1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 59e38e7e067e..67dc38981101 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5586,12 +5586,40 @@ static int mlx5e_queue_validate_qcfg(struct net_device *dev,
struct netdev_queue_config *qcfg,
struct netlink_ext_ack *extack)
{
- if (qcfg->rx_page_size != PAGE_SIZE)
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 max;
+
+ if (!is_power_of_2(qcfg->rx_page_size)) {
+ netdev_err(priv->netdev, "rx_page_size not power of 2: %u",
+ qcfg->rx_page_size);
return -EINVAL;
+ }
+
+ max = mlx5e_mpwrq_max_page_size(mdev);
+ if (qcfg->rx_page_size < PAGE_SIZE || qcfg->rx_page_size > max) {
+ netdev_err(priv->netdev,
+ "Selected rx_page_size %u not in supported range [%lu, %u]\n",
+ qcfg->rx_page_size, PAGE_SIZE, max);
+ return -ERANGE;
+ }
return 0;
}
+static bool mlx5e_queue_validate_page_size(struct net_device *dev,
+ struct netdev_queue_config *qcfg,
+ int queue_index)
+{
+ if (qcfg->rx_page_size == PAGE_SIZE)
+ return true;
+
+ if (!netif_rxq_has_unreadable_mp(dev, queue_index))
+ return false;
+
+ return true;
+}
+
static int mlx5e_queue_mem_alloc(struct net_device *dev,
struct netdev_queue_config *qcfg,
void *newq, int queue_index)
@@ -5623,6 +5651,12 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev,
goto unlock;
}
+ if (!mlx5e_queue_validate_page_size(dev, qcfg, queue_index)) {
+ netdev_err(priv->netdev, "High order pages are supported only in Zero-Copy mode\n");
+ err = -EINVAL;
+ goto unlock;
+ }
+
err = mlx5e_open_channel(priv, queue_index, ¶ms, qcfg, NULL,
&new->c);
unlock:
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode
2026-02-23 20:41 [PATCH net-next 00/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
` (14 preceding siblings ...)
2026-02-23 20:41 ` [PATCH net-next 15/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Tariq Toukan
@ 2026-02-26 10:10 ` patchwork-bot+netdevbpf
15 siblings, 0 replies; 17+ messages in thread
From: patchwork-bot+netdevbpf @ 2026-02-26 10:10 UTC (permalink / raw)
To: Tariq Toukan
Cc: edumazet, kuba, pabeni, andrew+netdev, davem, saeedm, leon,
mbloch, ast, daniel, hawk, john.fastabend, richardcochran, netdev,
linux-rdma, linux-kernel, bpf, gal, dtatulea, cratiu,
asml.silence, dw
Hello:
This series was applied to netdev/net-next.git (main)
by Paolo Abeni <pabeni@redhat.com>:
On Mon, 23 Feb 2026 22:41:40 +0200 you wrote:
> Hi,
>
> This series adds support for high order pages when io_uring/devmem
> zero copy is used.
>
> See detailed description by Dragos below.
>
> [...]
Here is the summary with links:
- [net-next,01/15] net/mlx5e: Make mlx5e_rq_param naming consistent
https://git.kernel.org/netdev/net-next/c/376cf4227401
- [net-next,02/15] net/mlx5e: Extract striding rq param calculation in function
https://git.kernel.org/netdev/net-next/c/d3a99b71a29c
- [net-next,03/15] net/mlx5e: Extract max_xsk_wqebbs into its own function
https://git.kernel.org/netdev/net-next/c/a2ff2f5f808f
- [net-next,04/15] net/mlx5e: Expose and rename xsk channel parameter function
https://git.kernel.org/netdev/net-next/c/ba4f39c256f5
- [net-next,05/15] net/mlx5e: Alloc xsk channel param out of mlx5e_open_xsk()
https://git.kernel.org/netdev/net-next/c/8a96b9144f18
- [net-next,06/15] net/mlx5e: Move xsk param into new option container struct
https://git.kernel.org/netdev/net-next/c/099efb294e0a
- [net-next,07/15] net/mlx5e: Drop unused channel parameters
https://git.kernel.org/netdev/net-next/c/3707a73854c1
- [net-next,08/15] net/mlx5e: SHAMPO, Always calculate page size
https://git.kernel.org/netdev/net-next/c/dff1c3164a69
- [net-next,09/15] net/mlx5e: Set page_pool order based on calculated page_shift
https://git.kernel.org/netdev/net-next/c/3a145cf492a3
- [net-next,10/15] net/mlx5e: Alloc rq drop page based on calculated page_shift
https://git.kernel.org/netdev/net-next/c/0285cc3dac1b
- [net-next,11/15] net/mlx5e: RX, Make page frag bias more robust
https://git.kernel.org/netdev/net-next/c/8611660778bf
- [net-next,12/15] net/mlx5e: Add queue config ops for page size
https://git.kernel.org/netdev/net-next/c/0fa8c9335760
- [net-next,13/15] net/mlx5e: Pass netdev queue config to param calculations
https://git.kernel.org/netdev/net-next/c/585cfa99d357
- [net-next,14/15] net/mlx5e: Add param helper to calculate max page size
https://git.kernel.org/netdev/net-next/c/5b6e0ddb3686
- [net-next,15/15] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode
https://git.kernel.org/netdev/net-next/c/df5135fced85
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 17+ messages in thread