From: Leon Romanovsky <leon@kernel.org>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Michael Guralnik <michaelgur@nvidia.com>, linux-rdma@vger.kernel.org
Subject: [PATCH rdma-next 3/4] RDMA/mlx5: Align mkc page size capability check to PRM
Date: Wed, 9 Jul 2025 09:42:10 +0300 [thread overview]
Message-ID: <eab4eeb4785105a4bb5eb362dc0b3662cd840412.1751979184.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1751979184.git.leon@kernel.org>
From: Michael Guralnik <michaelgur@nvidia.com>
Align the capabilities checked when using the log_page_size 6th bit in the
mkey context to the PRM definition. The upper and lower bounds are set by
max/min caps, and modification of the 6th bit by UMR is allowed only when
a specific UMR cap is set.
Current implementation falsely assumes all page sizes up-to 2^63 are
supported when the UMR cap is set. In case the upper bound cap is lower
than 63, this might result a FW syndrome on mkey creation, e.g:
mlx5_core 0000:c1:00.0: mlx5_cmd_out_err:832:(pid 0): CREATE_MKEY(0×200) op_mod(0×0) failed, status bad parameter(0×3), syndrome (0×38a711), err(-22)
Previous cap enforcement is still correct for all current HW, FW and
driver combinations. However, this patch aligns the code to be PRM
compliant in the general case.
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/hw/mlx5/mlx5_ib.h | 51 +++++++++++++++++++++++++---
drivers/infiniband/hw/mlx5/mr.c | 10 +++---
2 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bca549f70200..37e557d259fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1757,18 +1757,59 @@ static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
return (port - 1) / dev->num_ports + 1;
}
+static inline unsigned int get_max_log_entity_size_cap(struct mlx5_ib_dev *dev,
+ int access_mode)
+{
+ int max_log_size = 0;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ max_log_size =
+ MLX5_CAP_GEN_2(dev->mdev, max_mkey_log_entity_size_mtt);
+ else if (access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ max_log_size = MLX5_CAP_GEN_2(
+ dev->mdev, max_mkey_log_entity_size_fixed_buffer);
+
+ if (!max_log_size ||
+ (max_log_size > 31 &&
+ !MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)))
+ max_log_size = 31;
+
+ return max_log_size;
+}
+
+static inline unsigned int get_min_log_entity_size_cap(struct mlx5_ib_dev *dev,
+ int access_mode)
+{
+ int min_log_size = 0;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KSM &&
+ MLX5_CAP_GEN_2(dev->mdev,
+ min_mkey_log_entity_size_fixed_buffer_valid))
+ min_log_size = MLX5_CAP_GEN_2(
+ dev->mdev, min_mkey_log_entity_size_fixed_buffer);
+ else
+ min_log_size =
+ MLX5_CAP_GEN_2(dev->mdev, log_min_mkey_entity_size);
+
+ min_log_size = max(min_log_size, MLX5_ADAPTER_PAGE_SHIFT);
+ return min_log_size;
+}
+
/*
* For mkc users, instead of a page_offset the command has a start_iova which
* specifies both the page_offset and the on-the-wire IOVA
*/
static __always_inline unsigned long
mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- u64 iova)
+ u64 iova, int access_mode)
{
- int page_size_bits =
- MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : 5;
- unsigned long bitmap =
- __mlx5_log_page_size_to_bitmap(page_size_bits, 0);
+ unsigned int max_log_entity_size_cap, min_log_entity_size_cap;
+ unsigned long bitmap;
+
+ max_log_entity_size_cap = get_max_log_entity_size_cap(dev, access_mode);
+ min_log_entity_size_cap = get_min_log_entity_size_cap(dev, access_mode);
+
+ bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap);
return ib_umem_find_best_pgsz(umem, bitmap, iova);
}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5f36cad69dd7..753ca99c367f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1188,7 +1188,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
else
- page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
+ page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova,
+ access_mode);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
@@ -1526,8 +1527,8 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT, st_index, ph);
} else {
- unsigned long page_size =
- mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
+ unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(
+ dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT);
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size,
@@ -1873,7 +1874,8 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
- *page_size = mlx5_umem_mkc_find_best_pgsz(dev, new_umem, iova);
+ *page_size = mlx5_umem_mkc_find_best_pgsz(
+ dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode);
if (WARN_ON(!*page_size))
return false;
return (mr->mmkey.cache_ent->rb_key.ndescs) >=
--
2.50.0
next prev parent reply other threads:[~2025-07-09 6:42 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-09 6:42 [PATCH rdma-next 0/4] Optimize DMABUF mkey page size in mlx5 Leon Romanovsky
2025-07-09 6:42 ` [PATCH mlx5-next 1/4] net/mlx5: Expose HCA capability bits for mkey max page size Leon Romanovsky
2025-07-09 6:42 ` [PATCH mlx5-next 2/4] RDMA/mlx5: Fix UMR modifying of mkey " Leon Romanovsky
2025-07-09 6:42 ` Leon Romanovsky [this message]
2025-07-09 6:42 ` [PATCH rdma-next 4/4] RDMA/mlx5: Optimize DMABUF " Leon Romanovsky
2025-07-13 7:14 ` (subset) [PATCH rdma-next 0/4] Optimize DMABUF mkey page size in mlx5 Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=eab4eeb4785105a4bb5eb362dc0b3662cd840412.1751979184.git.leon@kernel.org \
--to=leon@kernel.org \
--cc=jgg@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=michaelgur@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).