* [PATCH rdma-next 1/3] IB/mlx5: DEVX handling for indirection MKEY
2019-01-13 14:01 [PATCH rdma-next 0/3] Indirect MKEY over DEVX Leon Romanovsky
@ 2019-01-13 14:01 ` Leon Romanovsky
2019-01-13 14:01 ` [PATCH mlx5-next 2/3] IB/mlx5: Manage indirection mkey upon DEVX flow for ODP Leon Romanovsky
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Leon Romanovsky @ 2019-01-13 14:01 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Yishai Hadas,
Saeed Mahameed, linux-netdev
From: Yishai Hadas <yishaih@mellanox.com>
Once an indirection MKEY is created umem valid bit shouldn't be set as
this MKEY doesn't really hold a umem.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/devx.c | 38 +++++++++++++++++++++++++++++--
1 file changed, 36 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 8bb711da7ee1..b7ff2138ac2a 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1011,6 +1011,32 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
}
}
+static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
+ struct devx_obj *obj,
+ void *in, int in_len)
+{
+ int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
+ MLX5_FLD_SZ_BYTES(create_mkey_in,
+ memory_key_mkey_entry);
+ void *mkc;
+ u8 access_mode;
+
+ if (in_len < min_len)
+ return -EINVAL;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
+ access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
+ access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ return 0;
+
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ return 0;
+}
+
static int devx_obj_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -1032,6 +1058,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+ int cmd_in_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
void *cmd_out;
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
@@ -1060,10 +1088,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
return -ENOMEM;
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- devx_set_umem_valid(cmd_in);
+ if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
+ err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
+ if (err)
+ goto obj_free;
+ } else {
+ devx_set_umem_valid(cmd_in);
+ }
err = mlx5_cmd_exec(dev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
+ cmd_in_len,
cmd_out, cmd_out_len);
if (err)
goto obj_free;
--
2.19.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH mlx5-next 2/3] IB/mlx5: Manage indirection mkey upon DEVX flow for ODP
2019-01-13 14:01 [PATCH rdma-next 0/3] Indirect MKEY over DEVX Leon Romanovsky
2019-01-13 14:01 ` [PATCH rdma-next 1/3] IB/mlx5: DEVX handling for indirection MKEY Leon Romanovsky
@ 2019-01-13 14:01 ` Leon Romanovsky
2019-01-13 14:01 ` [PATCH rdma-next 3/3] IB/mlx5: Add support for ODP for DEVX indirection mkey Leon Romanovsky
2019-01-22 3:08 ` [PATCH rdma-next 0/3] Indirect MKEY over DEVX Jason Gunthorpe
3 siblings, 0 replies; 5+ messages in thread
From: Leon Romanovsky @ 2019-01-13 14:01 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Yishai Hadas,
Saeed Mahameed, linux-netdev
From: Yishai Hadas <yishaih@mellanox.com>
Manage indirection mkey upon DEVX flow to support ODP.
To support a page fault event on the indirection mkey it needs to be
part of the device mkey radix tree.
Both the creation and the deletion flows for a DEVX object which is
indirection mkey were adapted to handle that.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/devx.c | 89 +++++++++++++++++++++++++++-
drivers/infiniband/hw/mlx5/main.c | 1 +
drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++
include/linux/mlx5/driver.h | 1 +
4 files changed, 96 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index b7ff2138ac2a..bbf9a26d8fa6 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -17,12 +17,18 @@
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+enum devx_obj_flags {
+ DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
+};
+
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
struct mlx5_core_dev *mdev;
u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ struct mlx5_ib_devx_mr devx_mr;
};
struct devx_umem {
@@ -1011,6 +1017,36 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
}
}
+static int devx_handle_mkey_indirect(struct devx_obj *obj,
+ struct mlx5_ib_dev *dev,
+ void *in, void *out)
+{
+ struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+ struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
+ unsigned long flags;
+ struct mlx5_core_mkey *mkey;
+ void *mkc;
+ u8 key;
+ int err;
+
+ mkey = &devx_mr->mmkey;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ key = MLX5_GET(mkc, mkc, mkey_7_0);
+ mkey->key = mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, out, mkey_index)) | key;
+ mkey->type = MLX5_MKEY_INDIRECT_DEVX;
+ mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+ mkey->size = MLX5_GET64(mkc, mkc, len);
+ mkey->pd = MLX5_GET(mkc, mkc, pd);
+ devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+
+ write_lock_irqsave(&table->lock, flags);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
+ mkey);
+ write_unlock_irqrestore(&table->lock, flags);
+ return err;
+}
+
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
struct devx_obj *obj,
void *in, int in_len)
@@ -1030,13 +1066,45 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
- access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
return 0;
+ }
MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
return 0;
}
+static void devx_free_indirect_mkey(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
+}
+
+/* This function to delete from the radix tree needs to be called before
+ * destroying the underlying mkey. Otherwise a race might occur in case that
+ * other thread will get the same mkey before this one will be deleted,
+ * in that case it will fail via inserting to the tree its own data.
+ *
+ * Note:
+ * An error in the destroy is not expected unless there is some other indirect
+ * mkey which points to this one. In a kernel cleanup flow it will be just
+ * destroyed in the iterative destruction call. In a user flow, in case
+ * the application didn't close in the expected order it's its own problem,
+ * the mkey won't be part of the tree, in both cases the kernel is safe.
+ */
+static void devx_cleanup_mkey(struct devx_obj *obj)
+{
+ struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
+ struct mlx5_core_mkey *del_mkey;
+ unsigned long flags;
+
+ write_lock_irqsave(&table->lock, flags);
+ del_mkey = radix_tree_delete(&table->tree,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
+ write_unlock_irqrestore(&table->lock, flags);
+}
+
static int devx_obj_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -1044,10 +1112,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
struct devx_obj *obj = uobject->object;
int ret;
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
+ devx_cleanup_mkey(obj);
+
ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ struct mlx5_ib_dev *dev = to_mdev(uobject->context->device);
+
+ call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
+ devx_free_indirect_mkey);
+ return ret;
+ }
+
kfree(obj);
return ret;
}
@@ -1108,6 +1187,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
+
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
goto obj_destroy;
@@ -1116,6 +1201,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
return 0;
obj_destroy:
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
+ devx_cleanup_mkey(obj);
mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
obj_free:
kfree(obj);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4b1b56d54301..6e7c21dcaa73 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5724,6 +5724,7 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ srcu_barrier(&dev->mr_srcu);
cleanup_srcu_struct(&dev->mr_srcu);
drain_workqueue(dev->advise_mr_wq);
destroy_workqueue(dev->advise_mr_wq);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b0a37ca2a714..819207190343 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -602,6 +602,12 @@ struct mlx5_ib_mw {
int ndescs;
};
+struct mlx5_ib_devx_mr {
+ struct mlx5_core_mkey mmkey;
+ int ndescs;
+ struct rcu_head rcu;
+};
+
struct mlx5_ib_umr_context {
struct ib_cqe cqe;
enum ib_wc_status status;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b6f5839f129a..619d6fee96a1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -364,6 +364,7 @@ struct mlx5_core_sig_ctx {
enum {
MLX5_MKEY_MR = 1,
MLX5_MKEY_MW,
+ MLX5_MKEY_INDIRECT_DEVX,
};
struct mlx5_core_mkey {
--
2.19.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH rdma-next 3/3] IB/mlx5: Add support for ODP for DEVX indirection mkey
2019-01-13 14:01 [PATCH rdma-next 0/3] Indirect MKEY over DEVX Leon Romanovsky
2019-01-13 14:01 ` [PATCH rdma-next 1/3] IB/mlx5: DEVX handling for indirection MKEY Leon Romanovsky
2019-01-13 14:01 ` [PATCH mlx5-next 2/3] IB/mlx5: Manage indirection mkey upon DEVX flow for ODP Leon Romanovsky
@ 2019-01-13 14:01 ` Leon Romanovsky
2019-01-22 3:08 ` [PATCH rdma-next 0/3] Indirect MKEY over DEVX Jason Gunthorpe
3 siblings, 0 replies; 5+ messages in thread
From: Leon Romanovsky @ 2019-01-13 14:01 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Yishai Hadas,
Saeed Mahameed, linux-netdev
From: Yishai Hadas <yishaih@mellanox.com>
Add support for ODP for DEVX indirection mkey, it includes:
- Recognizing its type as part of the radix tree lookup.
- Use similar flow as done for the MW MKEY type.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/odp.c | 26 +++++++++++++++++++++-----
1 file changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 8d46b1dc5658..82ac6cdc7130 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -685,6 +685,21 @@ struct pf_frame {
int depth;
};
+static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey)
+{
+ struct mlx5_ib_mw *mw;
+ struct mlx5_ib_devx_mr *devx_mr;
+
+ if (mmkey->type == MLX5_MKEY_MW) {
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+ return mw->ndescs;
+ }
+
+ devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr,
+ mmkey);
+ return devx_mr->ndescs;
+}
+
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@@ -705,11 +720,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mw *mw;
struct mlx5_ib_mr *mr;
struct mlx5_klm *pklm;
u32 *out = NULL;
size_t offset;
+ int ndescs;
srcu_key = srcu_read_lock(&dev->mr_srcu);
@@ -762,7 +777,8 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
break;
case MLX5_MKEY_MW:
- mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+ case MLX5_MKEY_INDIRECT_DEVX:
+ ndescs = get_indirect_num_descs(mmkey);
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
mlx5_ib_dbg(dev, "indirection level exceeded\n");
@@ -771,7 +787,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
}
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
- sizeof(*pklm) * (mw->ndescs - 2);
+ sizeof(*pklm) * (ndescs - 2);
if (outlen > cur_outlen) {
kfree(out);
@@ -786,14 +802,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
bsf0_klm0_pas_mtt0_1);
- ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
if (ret)
goto srcu_unlock;
offset = io_virt - MLX5_GET64(query_mkey_out, out,
memory_key_mkey_entry.start_addr);
- for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ for (i = 0; bcnt && i < ndescs; i++, pklm++) {
if (offset >= be32_to_cpu(pklm->bcount)) {
offset -= be32_to_cpu(pklm->bcount);
continue;
--
2.19.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH rdma-next 0/3] Indirect MKEY over DEVX
2019-01-13 14:01 [PATCH rdma-next 0/3] Indirect MKEY over DEVX Leon Romanovsky
` (2 preceding siblings ...)
2019-01-13 14:01 ` [PATCH rdma-next 3/3] IB/mlx5: Add support for ODP for DEVX indirection mkey Leon Romanovsky
@ 2019-01-22 3:08 ` Jason Gunthorpe
3 siblings, 0 replies; 5+ messages in thread
From: Jason Gunthorpe @ 2019-01-22 3:08 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Doug Ledford, Leon Romanovsky, RDMA mailing list, Artemy Kovalyov,
Yishai Hadas, Saeed Mahameed, linux-netdev
On Sun, Jan 13, 2019 at 04:01:15PM +0200, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
>
> Hi,
>
> >From Yishai,
>
> This series enables using indirect mkey over DEVX.
>
> The first patch recognizes the creation of an indirect mkey as part of
> DEVX object creation, in that case the umem valid bit shouldn't be set as
> this mkey doesn't really hold a umem.
>
> The next two patches manage indirection mkey upon DEVX flow to support ODP.
>
> To support a page fault event on an indirection mkey it needs to be part
> of the device mkey radix tree. Both the creation and the deletion flows for a
> DEVX object which is indirection mkey were adapted to handle that.
>
> In addition, the page fault flow was adapted to support indirect mkey type,
> it's handled in a similar flow as of the memory window mkey type.
>
> Yishai
>
> Yishai Hadas (3):
> IB/mlx5: DEVX handling for indirection MKEY
> IB/mlx5: Manage indirection mkey upon DEVX flow for ODP
> IB/mlx5: Add support for ODP for DEVX indirection mkey
applied to for-next
Thanks,
Jason
^ permalink raw reply [flat|nested] 5+ messages in thread