public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@ziepe.ca>
To: linux-rdma@vger.kernel.org
Cc: Jason Gunthorpe <jgg@mellanox.com>,
	Artemy Kovalyov <artemyko@mellanox.com>
Subject: [PATCH 08/15] RDMA/mlx5: Split implicit handling from pagefault_mr
Date: Wed,  9 Oct 2019 13:09:28 -0300	[thread overview]
Message-ID: <20191009160934.3143-9-jgg@ziepe.ca> (raw)
In-Reply-To: <20191009160934.3143-1-jgg@ziepe.ca>

From: Jason Gunthorpe <jgg@mellanox.com>

The single routine has a very confusing scheme to advance to the next
child MR when working on an implicit parent. This scheme can only be used
when working with an implicit parent and must not be triggered when
working on a normal MR.

Re-arrange things by directly putting all the single-MR stuff into one
function and calling it in a loop for the implicit case. Simplify some of
the error handling in the new pagefault_real_mr() to remove unneeded gotos.

Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 125 +++++++++++++++++++------------
 1 file changed, 76 insertions(+), 49 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 74f7caa9c99fb9..aba4f17c235467 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -629,33 +629,18 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 }
 
 #define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
-static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
-			u32 *bytes_mapped, u32 flags)
+static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
+			     u64 user_va, size_t bcnt, u32 *bytes_mapped,
+			     u32 flags)
 {
-	int npages = 0, current_seq, page_shift, ret, np;
-	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
+	int current_seq, page_shift, ret, np;
 	bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
 	u64 access_mask;
 	u64 start_idx, page_mask;
-	struct ib_umem_odp *odp;
-	size_t size;
-
-	if (odp_mr->is_implicit_odp) {
-		odp = implicit_mr_get_data(mr, io_virt, bcnt);
-
-		if (IS_ERR(odp))
-			return PTR_ERR(odp);
-		mr = odp->private;
-	} else {
-		odp = odp_mr;
-	}
-
-next_mr:
-	size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
 
 	page_shift = odp->page_shift;
 	page_mask = ~(BIT(page_shift) - 1);
-	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+	start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift;
 	access_mask = ODP_READ_ALLOWED_BIT;
 
 	if (odp->umem.writable && !downgrade)
@@ -668,13 +653,10 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 	 */
 	smp_rmb();
 
-	ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask,
-					current_seq);
-
-	if (ret < 0)
-		goto out;
-
-	np = ret;
+	np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
+				       current_seq);
+	if (np < 0)
+		return np;
 
 	mutex_lock(&odp->umem_mutex);
 	if (!ib_umem_mmu_notifier_retry(odp, current_seq)) {
@@ -699,31 +681,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 
 	if (bytes_mapped) {
 		u32 new_mappings = (np << page_shift) -
-			(io_virt - round_down(io_virt, 1 << page_shift));
-		*bytes_mapped += min_t(u32, new_mappings, size);
-	}
-
-	npages += np << (page_shift - PAGE_SHIFT);
-	bcnt -= size;
+			(user_va - round_down(user_va, 1 << page_shift));
 
-	if (unlikely(bcnt)) {
-		struct ib_umem_odp *next;
-
-		io_virt += size;
-		next = odp_next(odp);
-		if (unlikely(!next || ib_umem_start(next) != io_virt)) {
-			mlx5_ib_dbg(
-				mr->dev,
-				"next implicit leaf removed at 0x%llx. got %p\n",
-				io_virt, next);
-			return -EAGAIN;
-		}
-		odp = next;
-		mr = odp->private;
-		goto next_mr;
+		*bytes_mapped += min_t(u32, new_mappings, bcnt);
 	}
 
-	return npages;
+	return np << (page_shift - PAGE_SHIFT);
 
 out:
 	if (ret == -EAGAIN) {
@@ -742,6 +705,70 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 	return ret;
 }
 
+/*
+ * Returns:
+ *  -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
+ *           not accessible, or the MR is no longer valid.
+ *  -EAGAIN/-ENOMEM: The operation should be retried
+ *
+ *  -EINVAL/others: General internal malfunction
+ *  >0: Number of pages mapped
+ */
+static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
+			u32 *bytes_mapped, u32 flags)
+{
+	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+	struct ib_umem_odp *child;
+	int npages = 0;
+
+	if (!odp->is_implicit_odp) {
+		if (unlikely(io_virt < ib_umem_start(odp) ||
+			     ib_umem_end(odp) - io_virt < bcnt))
+			return -EFAULT;
+		return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped,
+					 flags);
+	}
+
+	if (unlikely(io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
+		     mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt))
+		return -EFAULT;
+
+	child = implicit_mr_get_data(mr, io_virt, bcnt);
+	if (IS_ERR(child))
+		return PTR_ERR(child);
+
+	/* Fault each child mr that intersects with our interval. */
+	while (bcnt) {
+		u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(child));
+		u64 len = end - io_virt;
+		int ret;
+
+		ret = pagefault_real_mr(child->private, child, io_virt, len,
+					bytes_mapped, flags);
+		if (ret < 0)
+			return ret;
+		io_virt += len;
+		bcnt -= len;
+		npages += ret;
+
+		if (unlikely(bcnt)) {
+			child = odp_next(child);
+			/*
+			 * implicit_mr_get_data sets up all the leaves, this
+			 * means they got invalidated before we got to them.
+			 */
+			if (!child || ib_umem_start(child) != io_virt) {
+				mlx5_ib_dbg(
+					mr->dev,
+					"next implicit leaf removed at 0x%llx.\n",
+					io_virt);
+				return -EAGAIN;
+			}
+		}
+	}
+	return npages;
+}
+
 struct pf_frame {
 	struct pf_frame *next;
 	u32 key;
-- 
2.23.0


  parent reply	other threads:[~2019-10-09 16:10 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-09 16:09 [PATCH 00/15] Rework the locking and datastructures for mlx5 implicit ODP Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 01/15] RDMA/mlx5: Use SRCU properly in ODP prefetch Jason Gunthorpe
2019-10-25 19:21   ` Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 02/15] RDMA/mlx5: Split sig_err MR data into its own xarray Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 03/15] RDMA/mlx5: Use a dedicated mkey xarray for ODP Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 04/15] RDMA/mlx5: Delete struct mlx5_priv->mkey_table Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 05/15] RDMA/mlx5: Rework implicit_mr_get_data Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 06/15] RDMA/mlx5: Lift implicit_mr_alloc() into the two routines that call it Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 07/15] RDMA/mlx5: Set the HW IOVA of the child MRs to their place in the tree Jason Gunthorpe
2019-10-09 16:09 ` Jason Gunthorpe [this message]
2019-10-09 16:09 ` [PATCH 09/15] RDMA/mlx5: Use an xarray for the children of an implicit ODP Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 10/15] RDMA/mlx5: Reduce locking in implicit_mr_get_data() Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 11/15] RDMA/mlx5: Avoid double lookups on the pagefault path Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 12/15] RDMA/mlx5: Rework implicit ODP destroy Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 13/15] RDMA/mlx5: Do not store implicit children in the odp_mkeys xarray Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 14/15] RDMA/mlx5: Do not race with mlx5_ib_invalidate_range during create and destroy Jason Gunthorpe
2019-10-28 14:18   ` Jason Gunthorpe
2019-10-09 16:09 ` [PATCH 15/15] RDMA/odp: Remove broken debugging call to invalidate_range Jason Gunthorpe
2019-10-28 19:47 ` [PATCH 00/15] Rework the locking and datastructures for mlx5 implicit ODP Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191009160934.3143-9-jgg@ziepe.ca \
    --to=jgg@ziepe.ca \
    --cc=artemyko@mellanox.com \
    --cc=jgg@mellanox.com \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox