From: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Artemy Kovalyov
<artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: [PATCH rdma-next V1 09/10] IB/mlx5: Extract page fault code
Date: Wed, 5 Apr 2017 09:23:58 +0300 [thread overview]
Message-ID: <20170405062359.26623-10-leon@kernel.org> (raw)
In-Reply-To: <20170405062359.26623-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
From: Artemy Kovalyov <artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
To make page fault handling code more flexible
split pagefault_single_data_segment() function.
Keep MR resolution in pagefault_single_data_segment() and
move actual updates into pagefault_single_mr().
Signed-off-by: Artemy Kovalyov <artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
drivers/infiniband/hw/mlx5/odp.c | 203 ++++++++++++++++++++-------------------
1 file changed, 104 insertions(+), 99 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index eddabd6e6596..842e1dbb50b8 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -511,81 +511,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
-/*
- * Handle a single data segment in a page-fault WQE or RDMA region.
- *
- * Returns number of OS pages retrieved on success. The caller may continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- * page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
- u32 key, u64 io_virt, size_t bcnt,
- u32 *bytes_committed,
- u32 *bytes_mapped)
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
- int srcu_key;
- unsigned int current_seq = 0;
- u64 start_idx, page_mask;
- int npages = 0, ret = 0;
- struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
+ int npages = 0, page_shift, np;
+ u64 start_idx, page_mask;
struct ib_umem_odp *odp;
- int implicit = 0;
+ int current_seq;
size_t size;
- int page_shift;
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(dev, key);
- /*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
- */
- if (!mr || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
- ret = -EFAULT;
- goto srcu_unlock;
- }
- if (!mr->umem->odp_data) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - *bytes_committed);
- goto srcu_unlock;
- }
-
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += *bytes_committed;
- bcnt -= *bytes_committed;
+ int ret;
if (!mr->umem->odp_data->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
- if (IS_ERR(odp)) {
- ret = PTR_ERR(odp);
- goto srcu_unlock;
- }
+ if (IS_ERR(odp))
+ return PTR_ERR(odp);
mr = odp->private;
- implicit = 1;
} else {
odp = mr->umem->odp_data;
}
+next_mr:
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);
+ start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+ if (mr->umem->writable)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
-next_mr:
current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
@@ -593,51 +550,43 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
*/
smp_rmb();
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
- start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
-
- if (mr->umem->writable)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
-
ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
access_mask, current_seq);
if (ret < 0)
- goto srcu_unlock;
+ goto out;
- if (ret > 0) {
- int np = ret;
-
- mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- page_shift,
- MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
- }
- mutex_unlock(&odp->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
- goto srcu_unlock;
- }
- if (bytes_mapped) {
- u32 new_mappings = (np << page_shift) -
- (io_virt - round_down(io_virt,
- 1 << page_shift));
- *bytes_mapped += min_t(u32, new_mappings, size);
- }
+ np = ret;
- npages += np << (page_shift - PAGE_SHIFT);
+ mutex_lock(&odp->umem_mutex);
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ /*
+ * No need to check whether the MTTs really belong to
+ * this MR, since ib_umem_odp_map_dma_pages already
+ * checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
+ page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+ } else {
+ ret = -EAGAIN;
}
+ mutex_unlock(&odp->umem_mutex);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+ goto out;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (io_virt - round_down(io_virt, 1 << page_shift));
+ *bytes_mapped += min_t(u32, new_mappings, size);
+ }
+
+ npages += np << (page_shift - PAGE_SHIFT);
bcnt -= size;
+
if (unlikely(bcnt)) {
struct ib_umem_odp *next;
@@ -646,17 +595,18 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
if (unlikely(!next || next->umem->address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
- ret = -EAGAIN;
- goto srcu_unlock_no_wait;
+ return -EAGAIN;
}
odp = next;
mr = odp->private;
goto next_mr;
}
-srcu_unlock:
+ return npages;
+
+out:
if (ret == -EAGAIN) {
- if (implicit || !odp->dying) {
+ if (mr->parent || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
@@ -672,7 +622,62 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
}
}
-srcu_unlock_no_wait:
+ return ret;
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+ u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
+ u32 *bytes_mapped)
+{
+ int npages = 0, srcu_key, ret;
+ struct mlx5_ib_mr *mr;
+ size_t size;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+ mr = mlx5_ib_odp_find_mr_lkey(dev, key);
+ /*
+ * If we didn't find the MR, it means the MR was closed while we were
+ * handling the ODP event. In this case we return -EFAULT so that the
+ * QP will be closed.
+ */
+ if (!mr || !mr->ibmr.pd) {
+ mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+ key);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+ if (!mr->umem->odp_data) {
+ mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped +=
+ (bcnt - *bytes_committed);
+ goto srcu_unlock;
+ }
+
+ /*
+ * Avoid branches - this code will perform correctly
+ * in all iterations (in iteration 2 and above,
+ * bytes_committed == 0).
+ */
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+ npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
+
+srcu_unlock:
srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
--
2.12.0
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-04-05 6:23 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-05 6:23 [PATCH rdma-next V1 00/10] ODP Fixes and Improvements Leon Romanovsky
[not found] ` <20170405062359.26623-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 6:23 ` [PATCH rdma-next V1 01/10] IB: Replace ib_umem page_size by page_shift Leon Romanovsky
[not found] ` <20170405062359.26623-2-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 9:48 ` Amrani, Ram
2017-04-05 16:38 ` Saleem, Shiraz
2017-04-05 17:18 ` Selvin Xavier
2017-04-05 17:30 ` Adit Ranadive
2017-04-05 6:23 ` [PATCH rdma-next V1 02/10] IB/mlx5: Fix function updating xlt emergency path Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 03/10] IB/mlx5: Fix UMR size calculation Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 04/10] IB/mlx5: Fix implicit MR GC Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 05/10] IB/mlx5: Decrease verbosity level of ODP errors Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 06/10] IB/umem: Add contiguous ODP support Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 07/10] IB/mlx5: " Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 08/10] IB/umem: Add support to huge ODP Leon Romanovsky
[not found] ` <20170405062359.26623-9-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 16:45 ` Shiraz Saleem
[not found] ` <20170405164539.GA9232-GOXS9JX10wfOxmVO0tvppfooFf0ArEBIu+b9c/7xato@public.gmane.org>
2017-04-05 17:33 ` Leon Romanovsky
2017-04-05 6:23 ` Leon Romanovsky [this message]
2017-04-05 6:23 ` [PATCH rdma-next V1 10/10] IB/mlx5: Add ODP support to MW Leon Romanovsky
2017-04-25 19:41 ` [PATCH rdma-next V1 00/10] ODP Fixes and Improvements Doug Ledford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170405062359.26623-10-leon@kernel.org \
--to=leon-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
--cc=artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
--cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.