From: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Artemy Kovalyov
<artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: [PATCH rdma-next V1 10/10] IB/mlx5: Add ODP support to MW
Date: Wed, 5 Apr 2017 09:23:59 +0300 [thread overview]
Message-ID: <20170405062359.26623-11-leon@kernel.org> (raw)
In-Reply-To: <20170405062359.26623-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
From: Artemy Kovalyov <artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Internally MW implemented as KLM MKey and filled by userspace UMR
postsends. Handle pagefault trigered by operations on this MKeys.
Signed-off-by: Artemy Kovalyov <artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 +
drivers/infiniband/hw/mlx5/mr.c | 1 +
drivers/infiniband/hw/mlx5/odp.c | 161 +++++++++++++++++++++++++----------
3 files changed, 120 insertions(+), 43 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 3cd064b5f0bf..9f519404ad7a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -513,6 +513,7 @@ struct mlx5_ib_mr {
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
+ int ndescs;
};
struct mlx5_ib_umr_context {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9a74260e9899..93c0e82aa491 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1688,6 +1688,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mw->mmkey.type = MLX5_MKEY_MW;
mw->ibmw.rkey = mw->mmkey.key;
+ mw->ndescs = ndescs;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 842e1dbb50b8..ae0746754008 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -288,24 +288,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
return;
}
-static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
- u32 key)
-{
- u32 base_key = mlx5_base_mkey(key);
- struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr;
-
- if (!mmkey || mmkey->key != key || mmkey->type != MLX5_MKEY_MR)
- return NULL;
-
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
-
- if (!mr->live)
- return NULL;
-
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
int error)
@@ -625,6 +607,14 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
return ret;
}
+struct pf_frame {
+ struct pf_frame *next;
+ u32 key;
+ u64 io_virt;
+ size_t bcnt;
+ int depth;
+};
+
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@@ -641,43 +631,128 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 *bytes_committed,
u32 *bytes_mapped)
{
- int npages = 0, srcu_key, ret;
+ int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ struct pf_frame *head = NULL, *frame;
+ struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mw *mw;
struct mlx5_ib_mr *mr;
- size_t size;
+ struct mlx5_klm *pklm;
+ u32 *out = NULL;
+ size_t offset;
srcu_key = srcu_read_lock(&dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(dev, key);
- /*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
- */
- if (!mr || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
+
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+next_mr:
+ mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ if (!mmkey || mmkey->key != key) {
+ mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
goto srcu_unlock;
}
- if (!mr->umem->odp_data) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - *bytes_committed);
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_MR:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (!mr->live || !mr->ibmr.pd) {
+ mlx5_ib_dbg(dev, "got dead MR\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
+ if (ret < 0)
+ goto srcu_unlock;
+
+ npages += ret;
+ ret = 0;
+ break;
+
+ case MLX5_MKEY_MW:
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+
+ if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
+ mlx5_ib_dbg(dev, "indirection level exceeded\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
+ sizeof(*pklm) * (mw->ndescs - 2);
+
+ if (outlen > cur_outlen) {
+ kfree(out);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+ cur_outlen = outlen;
+ }
+
+ pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
+ bsf0_klm0_pas_mtt0_1);
+
+ ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ if (ret)
+ goto srcu_unlock;
+
+ offset = io_virt - MLX5_GET64(query_mkey_out, out,
+ memory_key_mkey_entry.start_addr);
+
+ for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ if (offset >= be32_to_cpu(pklm->bcount)) {
+ offset -= be32_to_cpu(pklm->bcount);
+ continue;
+ }
+
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+
+ frame->key = be32_to_cpu(pklm->key);
+ frame->io_virt = be64_to_cpu(pklm->va) + offset;
+ frame->bcnt = min_t(size_t, bcnt,
+ be32_to_cpu(pklm->bcount) - offset);
+ frame->depth = depth + 1;
+ frame->next = head;
+ head = frame;
+
+ bcnt -= frame->bcnt;
+ }
+ break;
+
+ default:
+ mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
+ ret = -EFAULT;
goto srcu_unlock;
}
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += *bytes_committed;
- bcnt -= *bytes_committed;
+ if (head) {
+ frame = head;
+ head = frame->next;
- npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
+ key = frame->key;
+ io_virt = frame->io_virt;
+ bcnt = frame->bcnt;
+ depth = frame->depth;
+ kfree(frame);
+
+ goto next_mr;
+ }
srcu_unlock:
+ while (head) {
+ frame = head;
+ head = frame->next;
+ kfree(frame);
+ }
+ kfree(out);
+
srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
--
2.12.0
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-04-05 6:23 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-05 6:23 [PATCH rdma-next V1 00/10] ODP Fixes and Improvements Leon Romanovsky
[not found] ` <20170405062359.26623-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 6:23 ` [PATCH rdma-next V1 01/10] IB: Replace ib_umem page_size by page_shift Leon Romanovsky
[not found] ` <20170405062359.26623-2-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 9:48 ` Amrani, Ram
2017-04-05 16:38 ` Saleem, Shiraz
2017-04-05 17:18 ` Selvin Xavier
2017-04-05 17:30 ` Adit Ranadive
2017-04-05 6:23 ` [PATCH rdma-next V1 02/10] IB/mlx5: Fix function updating xlt emergency path Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 03/10] IB/mlx5: Fix UMR size calculation Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 04/10] IB/mlx5: Fix implicit MR GC Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 05/10] IB/mlx5: Decrease verbosity level of ODP errors Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 06/10] IB/umem: Add contiguous ODP support Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 07/10] IB/mlx5: " Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 08/10] IB/umem: Add support to huge ODP Leon Romanovsky
[not found] ` <20170405062359.26623-9-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 16:45 ` Shiraz Saleem
[not found] ` <20170405164539.GA9232-GOXS9JX10wfOxmVO0tvppfooFf0ArEBIu+b9c/7xato@public.gmane.org>
2017-04-05 17:33 ` Leon Romanovsky
2017-04-05 6:23 ` [PATCH rdma-next V1 09/10] IB/mlx5: Extract page fault code Leon Romanovsky
2017-04-05 6:23 ` Leon Romanovsky [this message]
2017-04-25 19:41 ` [PATCH rdma-next V1 00/10] ODP Fixes and Improvements Doug Ledford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170405062359.26623-11-leon@kernel.org \
--to=leon-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
--cc=artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
--cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.