linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Michael Guralnik <michaelgur@nvidia.com>, linux-rdma@vger.kernel.org
Subject: [PATCH rdma-next 3/4] RDMA/mlx5: Align mkc page size capability check to PRM
Date: Wed,  9 Jul 2025 09:42:10 +0300	[thread overview]
Message-ID: <eab4eeb4785105a4bb5eb362dc0b3662cd840412.1751979184.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1751979184.git.leon@kernel.org>

From: Michael Guralnik <michaelgur@nvidia.com>

Align the capabilities checked when using the log_page_size 6th bit in the
mkey context to the PRM definition. The upper and lower bounds are set by
max/min caps, and modification of the 6th bit by UMR is allowed only when
a specific UMR cap is set.
Current implementation falsely assumes all page sizes up-to 2^63 are
supported when the UMR cap is set. In case the upper bound cap is lower
than 63, this might result a FW syndrome on mkey creation, e.g:
mlx5_core 0000:c1:00.0: mlx5_cmd_out_err:832:(pid 0): CREATE_MKEY(0×200) op_mod(0×0) failed, status bad parameter(0×3), syndrome (0×38a711), err(-22)

Previous cap enforcement is still correct for all current HW, FW and
driver combinations. However, this patch aligns the code to be PRM
compliant in the general case.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 51 +++++++++++++++++++++++++---
 drivers/infiniband/hw/mlx5/mr.c      | 10 +++---
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bca549f70200..37e557d259fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1757,18 +1757,59 @@ static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
 	return (port - 1) / dev->num_ports + 1;
 }
 
+static inline unsigned int get_max_log_entity_size_cap(struct mlx5_ib_dev *dev,
+						       int access_mode)
+{
+	int max_log_size = 0;
+
+	if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+		max_log_size =
+			MLX5_CAP_GEN_2(dev->mdev, max_mkey_log_entity_size_mtt);
+	else if (access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+		max_log_size = MLX5_CAP_GEN_2(
+			dev->mdev, max_mkey_log_entity_size_fixed_buffer);
+
+	if (!max_log_size ||
+	    (max_log_size > 31 &&
+	     !MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)))
+		max_log_size = 31;
+
+	return max_log_size;
+}
+
+static inline unsigned int get_min_log_entity_size_cap(struct mlx5_ib_dev *dev,
+						       int access_mode)
+{
+	int min_log_size = 0;
+
+	if (access_mode == MLX5_MKC_ACCESS_MODE_KSM &&
+	    MLX5_CAP_GEN_2(dev->mdev,
+			   min_mkey_log_entity_size_fixed_buffer_valid))
+		min_log_size = MLX5_CAP_GEN_2(
+			dev->mdev, min_mkey_log_entity_size_fixed_buffer);
+	else
+		min_log_size =
+			MLX5_CAP_GEN_2(dev->mdev, log_min_mkey_entity_size);
+
+	min_log_size = max(min_log_size, MLX5_ADAPTER_PAGE_SHIFT);
+	return min_log_size;
+}
+
 /*
  * For mkc users, instead of a page_offset the command has a start_iova which
  * specifies both the page_offset and the on-the-wire IOVA
  */
 static __always_inline unsigned long
 mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			     u64 iova)
+			     u64 iova, int access_mode)
 {
-	int page_size_bits =
-		MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : 5;
-	unsigned long bitmap =
-		__mlx5_log_page_size_to_bitmap(page_size_bits, 0);
+	unsigned int max_log_entity_size_cap, min_log_entity_size_cap;
+	unsigned long bitmap;
+
+	max_log_entity_size_cap = get_max_log_entity_size_cap(dev, access_mode);
+	min_log_entity_size_cap = get_min_log_entity_size_cap(dev, access_mode);
+
+	bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap);
 
 	return ib_umem_find_best_pgsz(umem, bitmap, iova);
 }
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5f36cad69dd7..753ca99c367f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1188,7 +1188,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 	if (umem->is_dmabuf)
 		page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
 	else
-		page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
+		page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova,
+							 access_mode);
 	if (WARN_ON(!page_size))
 		return ERR_PTR(-EINVAL);
 
@@ -1526,8 +1527,8 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
 		mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
 					MLX5_MKC_ACCESS_MODE_MTT, st_index, ph);
 	} else {
-		unsigned long page_size =
-			mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
+		unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(
+			dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT);
 
 		mutex_lock(&dev->slow_path_mutex);
 		mr = reg_create(pd, umem, iova, access_flags, page_size,
@@ -1873,7 +1874,8 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
 	if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
 		return false;
 
-	*page_size = mlx5_umem_mkc_find_best_pgsz(dev, new_umem, iova);
+	*page_size = mlx5_umem_mkc_find_best_pgsz(
+		dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode);
 	if (WARN_ON(!*page_size))
 		return false;
 	return (mr->mmkey.cache_ent->rb_key.ndescs) >=
-- 
2.50.0


  parent reply	other threads:[~2025-07-09  6:42 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-09  6:42 [PATCH rdma-next 0/4] Optimize DMABUF mkey page size in mlx5 Leon Romanovsky
2025-07-09  6:42 ` [PATCH mlx5-next 1/4] net/mlx5: Expose HCA capability bits for mkey max page size Leon Romanovsky
2025-07-09  6:42 ` [PATCH mlx5-next 2/4] RDMA/mlx5: Fix UMR modifying of mkey " Leon Romanovsky
2025-07-09  6:42 ` Leon Romanovsky [this message]
2025-07-09  6:42 ` [PATCH rdma-next 4/4] RDMA/mlx5: Optimize DMABUF " Leon Romanovsky
2025-07-13  7:14 ` (subset) [PATCH rdma-next 0/4] Optimize DMABUF mkey page size in mlx5 Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=eab4eeb4785105a4bb5eb362dc0b3662cd840412.1751979184.git.leon@kernel.org \
    --to=leon@kernel.org \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=michaelgur@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).