* [PATCH rdma-next 1/4] IB/mlx5: Expose UAR object and its alloc/destroy commands
2020-03-18 12:43 [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Leon Romanovsky
@ 2020-03-18 12:43 ` Leon Romanovsky
2020-03-18 12:43 ` [PATCH rdma-next 2/4] IB/mlx5: Extend CQ creation to get uar page index from user space Leon Romanovsky
` (3 subsequent siblings)
4 siblings, 0 replies; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 12:43 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Yishai Hadas, linux-rdma, Michael Guralnik
From: Yishai Hadas <yishaih@mellanox.com>
Expose UAR object and its alloc/destroy commands to be used over the
ioctl interface by user space applications.
This API supports both BF & NC modes and enables a dynamic allocation of
UARs once really needed.
As the number of driver objects were limited by the core ones when the
merged tree is prepared, had to decrease the number of core objects to
enable the new UAR object usage.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/main.c | 172 ++++++++++++++++++++--
drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +
include/rdma/uverbs_ioctl.h | 2 +-
include/uapi/rdma/mlx5_user_ioctl_cmds.h | 18 +++
include/uapi/rdma/mlx5_user_ioctl_verbs.h | 5 +
5 files changed, 189 insertions(+), 10 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 05804e4ba292..e8787af2d74d 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2022,6 +2022,17 @@ static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
+static u64 uar_index2paddress(struct mlx5_ib_dev *dev,
+ int uar_idx)
+{
+ unsigned int fw_uars_per_page;
+
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_UARS_IN_PAGE : 1;
+
+ return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE);
+}
+
static int get_command(unsigned long offset)
{
return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
@@ -2106,6 +2117,11 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
mutex_unlock(&var_table->bitmap_lock);
kfree(mentry);
break;
+ case MLX5_IB_MMAP_TYPE_UAR_WC:
+ case MLX5_IB_MMAP_TYPE_UAR_NC:
+ mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
+ kfree(mentry);
+ break;
default:
WARN_ON(true);
}
@@ -2257,7 +2273,8 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
mentry = to_mmmap(entry);
pfn = (mentry->address >> PAGE_SHIFT);
- if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR)
+ if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
+ mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
prot = pgprot_noncached(vma->vm_page_prot);
else
prot = pgprot_writecombine(vma->vm_page_prot);
@@ -6079,9 +6096,9 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static int var_obj_cleanup(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
+static int mmap_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_user_mmap_entry *obj = uobject->object;
@@ -6089,6 +6106,16 @@ static int var_obj_cleanup(struct ib_uobject *uobject,
return 0;
}
+static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c,
+ struct mlx5_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(
+ &c->ibucontext, &entry->rdma_entry, length,
+ (MLX5_IB_MMAP_OFFSET_START << 16),
+ ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1));
+}
+
static struct mlx5_user_mmap_entry *
alloc_var_entry(struct mlx5_ib_ucontext *c)
{
@@ -6119,10 +6146,8 @@ alloc_var_entry(struct mlx5_ib_ucontext *c)
entry->page_idx = page_idx;
entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
- err = rdma_user_mmap_entry_insert_range(
- &c->ibucontext, &entry->rdma_entry, var_table->stride_size,
- MLX5_IB_MMAP_OFFSET_START << 16,
- (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1);
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry,
+ var_table->stride_size);
if (err)
goto err_insert;
@@ -6206,7 +6231,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
- UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup),
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
&UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
&UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
@@ -6218,6 +6243,134 @@ static bool var_is_supported(struct ib_device *device)
MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
}
+static struct mlx5_user_mmap_entry *
+alloc_uar_entry(struct mlx5_ib_ucontext *c,
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type)
+{
+ struct mlx5_user_mmap_entry *entry;
+ struct mlx5_ib_dev *dev;
+ u32 uar_index;
+ int err;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ dev = to_mdev(c->ibucontext.device);
+ err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+ if (err)
+ goto end;
+
+ entry->page_idx = uar_index;
+ entry->address = uar_index2paddress(dev, uar_index);
+ if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC;
+ else
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC;
+
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE);
+ if (err)
+ goto err_insert;
+
+ return entry;
+
+err_insert:
+ mlx5_cmd_free_uar(dev->mdev, uar_index);
+end:
+ kfree(entry);
+ return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_user_mmap_entry *entry;
+ u64 mmap_offset;
+ u32 length;
+ int err;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ err = uverbs_get_const(&alloc_type, attrs,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE);
+ if (err)
+ return err;
+
+ if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF &&
+ alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
+ return -EOPNOTSUPP;
+
+ if (!to_mdev(c->ibucontext.device)->wc_support &&
+ alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+ return -EOPNOTSUPP;
+
+ entry = alloc_uar_entry(c, alloc_type);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
+ length = entry->rdma_entry.npages * PAGE_SIZE;
+ uobj->object = entry;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ goto err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+ &entry->page_idx, sizeof(entry->page_idx));
+ if (err)
+ goto err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+ return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_UAR_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_UAR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+ enum mlx5_ib_uapi_uar_alloc_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_UAR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
+
ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_dm,
UVERBS_OBJECT_DM,
@@ -6249,6 +6402,7 @@ static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
{}
};
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 85d4f3958e32..370b03db366b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -124,6 +124,8 @@ enum {
enum mlx5_ib_mmap_type {
MLX5_IB_MMAP_TYPE_MEMIC = 1,
MLX5_IB_MMAP_TYPE_VAR = 2,
+ MLX5_IB_MMAP_TYPE_UAR_WC = 3,
+ MLX5_IB_MMAP_TYPE_UAR_NC = 4,
};
struct mlx5_ib_ucontext {
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 28570ac2b6a0..9f3b1e004046 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -173,7 +173,7 @@ enum uapi_radix_data {
UVERBS_API_OBJ_KEY_BITS = 5,
UVERBS_API_OBJ_KEY_SHIFT =
UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
- UVERBS_API_OBJ_KEY_NUM_CORE = 24,
+ UVERBS_API_OBJ_KEY_NUM_CORE = 20,
UVERBS_API_OBJ_KEY_NUM_DRIVER =
(1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 8f4a417fc70a..24f3388c3182 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -131,6 +131,23 @@ enum mlx5_ib_var_obj_methods {
MLX5_IB_METHOD_VAR_OBJ_DESTROY,
};
+enum mlx5_ib_uar_alloc_attrs {
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+};
+
+enum mlx5_ib_uar_obj_destroy_attrs {
+ MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_uar_obj_methods {
+ MLX5_IB_METHOD_UAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+};
+
enum mlx5_ib_devx_umem_reg_attrs {
MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
@@ -190,6 +207,7 @@ enum mlx5_ib_objects {
MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
MLX5_IB_OBJECT_VAR,
MLX5_IB_OBJECT_PP,
+ MLX5_IB_OBJECT_UAR,
};
enum mlx5_ib_flow_matcher_create_attrs {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index b4641a7865f7..3f7a97c28045 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -77,5 +77,10 @@ enum mlx5_ib_uapi_pp_alloc_flags {
MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX = 1 << 0,
};
+enum mlx5_ib_uapi_uar_alloc_type {
+ MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF = 0x0,
+ MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC = 0x1,
+};
+
#endif
--
2.24.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH rdma-next 2/4] IB/mlx5: Extend CQ creation to get uar page index from user space
2020-03-18 12:43 [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Leon Romanovsky
2020-03-18 12:43 ` [PATCH rdma-next 1/4] IB/mlx5: Expose UAR object and its alloc/destroy commands Leon Romanovsky
@ 2020-03-18 12:43 ` Leon Romanovsky
2020-03-18 12:43 ` [PATCH rdma-next 3/4] IB/mlx5: Extend QP " Leon Romanovsky
` (2 subsequent siblings)
4 siblings, 0 replies; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 12:43 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Yishai Hadas, linux-rdma, Michael Guralnik
From: Yishai Hadas <yishaih@mellanox.com>
Extend CQ creation to get uar page index from user space, this mode can
be used with the UAR dynamic mode APIs to allocate/destroy a UAR object.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/cq.c | 17 +++++++++++------
include/uapi/rdma/mlx5-abi.h | 4 ++++
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 3dec3de903b7..eafedc2f697b 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -715,17 +715,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- ucmdlen = udata->inlen < sizeof(ucmd) ?
- (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
+ if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
+ return -EINVAL;
if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
- if (ucmdlen == sizeof(ucmd) &&
- (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD)))
+ if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
+ MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)))
return -EINVAL;
- if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+ if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
+ ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
*cqe_size = ucmd.cqe_size;
@@ -762,7 +764,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- *index = context->bfregi.sys_pages[0];
+ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)
+ *index = ucmd.uar_page_index;
+ else
+ *index = context->bfregi.sys_pages[0];
if (ucmd.cqe_comp_en == 1) {
int mini_cqe_format;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 624f5b53eb1f..e900f9a64feb 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -266,6 +266,7 @@ struct mlx5_ib_query_device_resp {
enum mlx5_ib_create_cq_flags {
MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0,
+ MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX = 1 << 1,
};
struct mlx5_ib_create_cq {
@@ -275,6 +276,9 @@ struct mlx5_ib_create_cq {
__u8 cqe_comp_en;
__u8 cqe_comp_res_format;
__u16 flags;
+ __u16 uar_page_index;
+ __u16 reserved0;
+ __u32 reserved1;
};
struct mlx5_ib_create_cq_resp {
--
2.24.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH rdma-next 3/4] IB/mlx5: Extend QP creation to get uar page index from user space
2020-03-18 12:43 [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Leon Romanovsky
2020-03-18 12:43 ` [PATCH rdma-next 1/4] IB/mlx5: Expose UAR object and its alloc/destroy commands Leon Romanovsky
2020-03-18 12:43 ` [PATCH rdma-next 2/4] IB/mlx5: Extend CQ creation to get uar page index from user space Leon Romanovsky
@ 2020-03-18 12:43 ` Leon Romanovsky
2020-03-18 13:05 ` Jason Gunthorpe
2020-03-18 12:43 ` [PATCH mlx5-next 4/4] IB/mlx5: Move to fully dynamic UAR mode once user space supports it Leon Romanovsky
2020-03-18 12:54 ` [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Jason Gunthorpe
4 siblings, 1 reply; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 12:43 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Yishai Hadas, linux-rdma, Michael Guralnik
From: Yishai Hadas <yishaih@mellanox.com>
Extend QP creation to get uar page index from user space, this mode can
be used with the UAR dynamic mode APIs to allocate/destroy a UAR object.
As part of enabling this option blocked the weird/un-supported cross
channel option which uses index 0 hard-coded.
This QP flag wasn't exposed to user space as part of any formal upstream
release, the dynamic option can allow having valid UAR page index
instead.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/qp.c | 27 +++++++++++++++++----------
include/uapi/rdma/mlx5-abi.h | 1 +
2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 88db580f7272..380ba3321851 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -919,6 +919,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
void *qpc;
int err;
u16 uid;
+ u32 uar_flags;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err) {
@@ -928,24 +929,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
ibucontext);
- if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
+ uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX |
+ MLX5_QP_FLAG_BFREG_INDEX);
+ switch (uar_flags) {
+ case MLX5_QP_FLAG_UAR_PAGE_INDEX:
+ uar_index = ucmd.bfreg_index;
+ bfregn = MLX5_IB_INVALID_BFREG;
+ break;
+ case MLX5_QP_FLAG_BFREG_INDEX:
uar_index = bfregn_to_uar_index(dev, &context->bfregi,
ucmd.bfreg_index, true);
if (uar_index < 0)
return uar_index;
-
bfregn = MLX5_IB_INVALID_BFREG;
- } else if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) {
- /*
- * TBD: should come from the verbs when we have the API
- */
- /* In CROSS_CHANNEL CQ and QP must use the same UAR */
- bfregn = MLX5_CROSS_CHANNEL_BFREG;
- }
- else {
+ break;
+ case 0:
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ return -EINVAL;
bfregn = alloc_bfreg(dev, &context->bfregi);
if (bfregn < 0)
return bfregn;
+ break;
+ default:
+ return -EINVAL;
}
mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
@@ -2100,6 +2106,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC |
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_UAR_PAGE_INDEX |
MLX5_QP_FLAG_TYPE_DCI |
MLX5_QP_FLAG_TYPE_DCT))
return -EINVAL;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index e900f9a64feb..a65d60b44829 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -49,6 +49,7 @@ enum {
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8,
MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9,
+ MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10,
};
enum {
--
2.24.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 3/4] IB/mlx5: Extend QP creation to get uar page index from user space
2020-03-18 12:43 ` [PATCH rdma-next 3/4] IB/mlx5: Extend QP " Leon Romanovsky
@ 2020-03-18 13:05 ` Jason Gunthorpe
2020-03-18 13:08 ` Leon Romanovsky
0 siblings, 1 reply; 19+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 13:05 UTC (permalink / raw)
To: Leon Romanovsky; +Cc: Doug Ledford, Yishai Hadas, linux-rdma, Michael Guralnik
On Wed, Mar 18, 2020 at 02:43:28PM +0200, Leon Romanovsky wrote:
> From: Yishai Hadas <yishaih@mellanox.com>
>
> Extend QP creation to get uar page index from user space, this mode can
> be used with the UAR dynamic mode APIs to allocate/destroy a UAR object.
>
> As part of enabling this option blocked the weird/un-supported cross
> channel option which uses index 0 hard-coded.
>
> This QP flag wasn't exposed to user space as part of any formal upstream
> release, the dynamic option can allow having valid UAR page index
> instead.
>
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> drivers/infiniband/hw/mlx5/qp.c | 27 +++++++++++++++++----------
> include/uapi/rdma/mlx5-abi.h | 1 +
> 2 files changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
> index 88db580f7272..380ba3321851 100644
> +++ b/drivers/infiniband/hw/mlx5/qp.c
> @@ -919,6 +919,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
> void *qpc;
> int err;
> u16 uid;
> + u32 uar_flags;
>
> err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
> if (err) {
> @@ -928,24 +929,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
>
> context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
> ibucontext);
> - if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
> + uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX |
> + MLX5_QP_FLAG_BFREG_INDEX);
> + switch (uar_flags) {
Do we really need this temporary uar_flags?
Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 3/4] IB/mlx5: Extend QP creation to get uar page index from user space
2020-03-18 13:05 ` Jason Gunthorpe
@ 2020-03-18 13:08 ` Leon Romanovsky
0 siblings, 0 replies; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 13:08 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Doug Ledford, Yishai Hadas, linux-rdma, Michael Guralnik
On Wed, Mar 18, 2020 at 10:05:14AM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 18, 2020 at 02:43:28PM +0200, Leon Romanovsky wrote:
> > From: Yishai Hadas <yishaih@mellanox.com>
> >
> > Extend QP creation to get uar page index from user space, this mode can
> > be used with the UAR dynamic mode APIs to allocate/destroy a UAR object.
> >
> > As part of enabling this option blocked the weird/un-supported cross
> > channel option which uses index 0 hard-coded.
> >
> > This QP flag wasn't exposed to user space as part of any formal upstream
> > release, the dynamic option can allow having valid UAR page index
> > instead.
> >
> > Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> > Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > drivers/infiniband/hw/mlx5/qp.c | 27 +++++++++++++++++----------
> > include/uapi/rdma/mlx5-abi.h | 1 +
> > 2 files changed, 18 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
> > index 88db580f7272..380ba3321851 100644
> > +++ b/drivers/infiniband/hw/mlx5/qp.c
> > @@ -919,6 +919,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
> > void *qpc;
> > int err;
> > u16 uid;
> > + u32 uar_flags;
> >
> > err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
> > if (err) {
> > @@ -928,24 +929,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
> >
> > context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
> > ibucontext);
> > - if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
> > + uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX |
> > + MLX5_QP_FLAG_BFREG_INDEX);
> > + switch (uar_flags) {
>
> Do we really need this temporary uar_flags?
I thinks that it is more clear, this is why I used temp variable.
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH mlx5-next 4/4] IB/mlx5: Move to fully dynamic UAR mode once user space supports it
2020-03-18 12:43 [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Leon Romanovsky
` (2 preceding siblings ...)
2020-03-18 12:43 ` [PATCH rdma-next 3/4] IB/mlx5: Extend QP " Leon Romanovsky
@ 2020-03-18 12:43 ` Leon Romanovsky
2020-03-18 23:38 ` Saeed Mahameed
2020-03-18 12:54 ` [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Jason Gunthorpe
4 siblings, 1 reply; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 12:43 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Yishai Hadas, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed
From: Yishai Hadas <yishaih@mellanox.com>
Move to fully dynamic UAR mode once user space supports it.
In this case we prevent any legacy mode of UARs on the allocated context
and prevent redundant allocation of the static ones.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/cq.c | 8 ++++++--
drivers/infiniband/hw/mlx5/main.c | 13 ++++++++++++-
drivers/infiniband/hw/mlx5/qp.c | 6 ++++++
include/linux/mlx5/driver.h | 1 +
include/uapi/rdma/mlx5-abi.h | 1 +
5 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index eafedc2f697b..146ba2966744 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -764,10 +764,14 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)
+ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
*index = ucmd.uar_page_index;
- else
+ } else if (context->bfregi.lib_uar_dyn) {
+ err = -EINVAL;
+ goto err_cqb;
+ } else {
*index = context->bfregi.sys_pages[0];
+ }
if (ucmd.cqe_comp_en == 1) {
int mini_cqe_format;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e8787af2d74d..e355e06bf3ac 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1787,6 +1787,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
max_cqe_version);
u32 dump_fill_mkey;
bool lib_uar_4k;
+ bool lib_uar_dyn;
if (!dev->ib_active)
return -EAGAIN;
@@ -1845,8 +1846,14 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
}
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+ lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
+ if (lib_uar_dyn) {
+ bfregi->lib_uar_dyn = lib_uar_dyn;
+ goto uar_done;
+ }
+
/* updates req->total_num_bfregs */
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
@@ -1873,6 +1880,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (err)
goto out_sys_pages;
+uar_done:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
err = mlx5_ib_devx_create(dev, true);
if (err < 0)
@@ -1894,7 +1902,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_bfregs = req.total_num_bfregs;
+ resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
resp.num_ports = dev->num_ports;
if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
@@ -2142,6 +2150,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
bfregi->num_static_sys_pages;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 380ba3321851..319d514a2223 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -697,6 +697,9 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
{
int bfregn = -ENOMEM;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
mutex_lock(&bfregi->lock);
if (bfregi->ver >= 2) {
bfregn = alloc_high_class_bfreg(dev, bfregi);
@@ -768,6 +771,9 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
u32 index_of_sys_page;
u32 offset;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
MLX5_NON_FP_BFREGS_PER_UAR;
index_of_sys_page = bfregn / bfregs_per_sys_page;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3f10a9633012..e4ab0eb9d202 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -224,6 +224,7 @@ struct mlx5_bfreg_info {
struct mutex lock;
u32 ver;
bool lib_uar_4k;
+ u8 lib_uar_dyn : 1;
u32 num_sys_pages;
u32 num_static_sys_pages;
u32 total_num_bfregs;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index a65d60b44829..df1cc3641bda 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -79,6 +79,7 @@ struct mlx5_ib_alloc_ucontext_req {
enum mlx5_lib_caps {
MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0,
+ MLX5_LIB_CAP_DYN_UAR = (__u64)1 << 1,
};
enum mlx5_ib_alloc_uctx_v2_flags {
--
2.24.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH mlx5-next 4/4] IB/mlx5: Move to fully dynamic UAR mode once user space supports it
2020-03-18 12:43 ` [PATCH mlx5-next 4/4] IB/mlx5: Move to fully dynamic UAR mode once user space supports it Leon Romanovsky
@ 2020-03-18 23:38 ` Saeed Mahameed
2020-03-19 5:55 ` Leon Romanovsky
0 siblings, 1 reply; 19+ messages in thread
From: Saeed Mahameed @ 2020-03-18 23:38 UTC (permalink / raw)
To: Jason Gunthorpe, leon@kernel.org, dledford@redhat.com
Cc: Michael Guralnik, Yishai Hadas, netdev@vger.kernel.org,
linux-rdma@vger.kernel.org
On Wed, 2020-03-18 at 14:43 +0200, Leon Romanovsky wrote:
> From: Yishai Hadas <yishaih@mellanox.com>
>
> Move to fully dynamic UAR mode once user space supports it.
> In this case we prevent any legacy mode of UARs on the allocated
> context
> and prevent redundant allocation of the static ones.
>
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
> drivers/infiniband/hw/mlx5/cq.c | 8 ++++++--
> drivers/infiniband/hw/mlx5/main.c | 13 ++++++++++++-
> drivers/infiniband/hw/mlx5/qp.c | 6 ++++++
> include/linux/mlx5/driver.h | 1 +
> include/uapi/rdma/mlx5-abi.h | 1 +
> 5 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mlx5/cq.c
> b/drivers/infiniband/hw/mlx5/cq.c
> index eafedc2f697b..146ba2966744 100644
> --- a/drivers/infiniband/hw/mlx5/cq.c
> +++ b/drivers/infiniband/hw/mlx5/cq.c
> @@ -764,10 +764,14 @@ static int create_cq_user(struct mlx5_ib_dev
> *dev, struct ib_udata *udata,
> MLX5_SET(cqc, cqc, log_page_size,
> page_shift - MLX5_ADAPTER_PAGE_SHIFT);
>
> - if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)
> + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
> *index = ucmd.uar_page_index;
> - else
> + } else if (context->bfregi.lib_uar_dyn) {
> + err = -EINVAL;
> + goto err_cqb;
> + } else {
> *index = context->bfregi.sys_pages[0];
> + }
>
> if (ucmd.cqe_comp_en == 1) {
> int mini_cqe_format;
> diff --git a/drivers/infiniband/hw/mlx5/main.c
> b/drivers/infiniband/hw/mlx5/main.c
> index e8787af2d74d..e355e06bf3ac 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -1787,6 +1787,7 @@ static int mlx5_ib_alloc_ucontext(struct
> ib_ucontext *uctx,
> max_cqe_version);
> u32 dump_fill_mkey;
> bool lib_uar_4k;
> + bool lib_uar_dyn;
>
> if (!dev->ib_active)
> return -EAGAIN;
> @@ -1845,8 +1846,14 @@ static int mlx5_ib_alloc_ucontext(struct
> ib_ucontext *uctx,
> }
>
> lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
> + lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
> bfregi = &context->bfregi;
>
> + if (lib_uar_dyn) {
> + bfregi->lib_uar_dyn = lib_uar_dyn;
> + goto uar_done;
> + }
> +
> /* updates req->total_num_bfregs */
> err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
> if (err)
> @@ -1873,6 +1880,7 @@ static int mlx5_ib_alloc_ucontext(struct
> ib_ucontext *uctx,
> if (err)
> goto out_sys_pages;
>
> +uar_done:
> if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
> err = mlx5_ib_devx_create(dev, true);
> if (err < 0)
> @@ -1894,7 +1902,7 @@ static int mlx5_ib_alloc_ucontext(struct
> ib_ucontext *uctx,
> INIT_LIST_HEAD(&context->db_page_list);
> mutex_init(&context->db_page_mutex);
>
> - resp.tot_bfregs = req.total_num_bfregs;
> + resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
> resp.num_ports = dev->num_ports;
>
> if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
> @@ -2142,6 +2150,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev,
> enum mlx5_ib_mmap_cmd cmd,
> int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
> bfregi->num_static_sys_pages;
>
> + if (bfregi->lib_uar_dyn)
> + return -EINVAL;
> +
> if (vma->vm_end - vma->vm_start != PAGE_SIZE)
> return -EINVAL;
>
> diff --git a/drivers/infiniband/hw/mlx5/qp.c
> b/drivers/infiniband/hw/mlx5/qp.c
> index 380ba3321851..319d514a2223 100644
> --- a/drivers/infiniband/hw/mlx5/qp.c
> +++ b/drivers/infiniband/hw/mlx5/qp.c
> @@ -697,6 +697,9 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
> {
> int bfregn = -ENOMEM;
>
> + if (bfregi->lib_uar_dyn)
> + return -EINVAL;
> +
> mutex_lock(&bfregi->lock);
> if (bfregi->ver >= 2) {
> bfregn = alloc_high_class_bfreg(dev, bfregi);
> @@ -768,6 +771,9 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
> u32 index_of_sys_page;
> u32 offset;
>
> + if (bfregi->lib_uar_dyn)
> + return -EINVAL;
> +
> bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi-
> >lib_uar_4k) *
> MLX5_NON_FP_BFREGS_PER_UAR;
> index_of_sys_page = bfregn / bfregs_per_sys_page;
> diff --git a/include/linux/mlx5/driver.h
> b/include/linux/mlx5/driver.h
> index 3f10a9633012..e4ab0eb9d202 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -224,6 +224,7 @@ struct mlx5_bfreg_info {
> struct mutex lock;
> u32 ver;
> bool lib_uar_4k;
> + u8 lib_uar_dyn : 1;
> u32 num_sys_pages;
> u32 num_static_sys_pages
> u32 total_num_bfregs;
this struct is not used in mlx5_core, shall we move it to mlx5_ib as
part of this patch?
so next time you need to update it you don't need to bother netdev busy
people about it ;-)
> diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-
> abi.h
> index a65d60b44829..df1cc3641bda 100644
> --- a/include/uapi/rdma/mlx5-abi.h
> +++ b/include/uapi/rdma/mlx5-abi.h
> @@ -79,6 +79,7 @@ struct mlx5_ib_alloc_ucontext_req {
>
> enum mlx5_lib_caps {
> MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0,
> + MLX5_LIB_CAP_DYN_UAR = (__u64)1 << 1,
> };
>
> enum mlx5_ib_alloc_uctx_v2_flags {
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH mlx5-next 4/4] IB/mlx5: Move to fully dynamic UAR mode once user space supports it
2020-03-18 23:38 ` Saeed Mahameed
@ 2020-03-19 5:55 ` Leon Romanovsky
0 siblings, 0 replies; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-19 5:55 UTC (permalink / raw)
To: Saeed Mahameed
Cc: Jason Gunthorpe, dledford@redhat.com, Michael Guralnik,
Yishai Hadas, netdev@vger.kernel.org, linux-rdma@vger.kernel.org
On Wed, Mar 18, 2020 at 11:38:32PM +0000, Saeed Mahameed wrote:
> On Wed, 2020-03-18 at 14:43 +0200, Leon Romanovsky wrote:
> > From: Yishai Hadas <yishaih@mellanox.com>
> >
> > Move to fully dynamic UAR mode once user space supports it.
> > In this case we prevent any legacy mode of UARs on the allocated
> > context
> > and prevent redundant allocation of the static ones.
> >
> > Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> > Reviewed-by: Michael Guralnik <michaelgur@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > ---
> > drivers/infiniband/hw/mlx5/cq.c | 8 ++++++--
> > drivers/infiniband/hw/mlx5/main.c | 13 ++++++++++++-
> > drivers/infiniband/hw/mlx5/qp.c | 6 ++++++
> > include/linux/mlx5/driver.h | 1 +
> > include/uapi/rdma/mlx5-abi.h | 1 +
> > 5 files changed, 26 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mlx5/cq.c
> > b/drivers/infiniband/hw/mlx5/cq.c
> > index eafedc2f697b..146ba2966744 100644
> > --- a/drivers/infiniband/hw/mlx5/cq.c
> > +++ b/drivers/infiniband/hw/mlx5/cq.c
> > @@ -764,10 +764,14 @@ static int create_cq_user(struct mlx5_ib_dev
> > *dev, struct ib_udata *udata,
> > MLX5_SET(cqc, cqc, log_page_size,
> > page_shift - MLX5_ADAPTER_PAGE_SHIFT);
> >
> > - if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)
> > + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
> > *index = ucmd.uar_page_index;
> > - else
> > + } else if (context->bfregi.lib_uar_dyn) {
> > + err = -EINVAL;
> > + goto err_cqb;
> > + } else {
> > *index = context->bfregi.sys_pages[0];
> > + }
> >
> > if (ucmd.cqe_comp_en == 1) {
> > int mini_cqe_format;
> > diff --git a/drivers/infiniband/hw/mlx5/main.c
> > b/drivers/infiniband/hw/mlx5/main.c
> > index e8787af2d74d..e355e06bf3ac 100644
> > --- a/drivers/infiniband/hw/mlx5/main.c
> > +++ b/drivers/infiniband/hw/mlx5/main.c
> > @@ -1787,6 +1787,7 @@ static int mlx5_ib_alloc_ucontext(struct
> > ib_ucontext *uctx,
> > max_cqe_version);
> > u32 dump_fill_mkey;
> > bool lib_uar_4k;
> > + bool lib_uar_dyn;
> >
> > if (!dev->ib_active)
> > return -EAGAIN;
> > @@ -1845,8 +1846,14 @@ static int mlx5_ib_alloc_ucontext(struct
> > ib_ucontext *uctx,
> > }
> >
> > lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
> > + lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
> > bfregi = &context->bfregi;
> >
> > + if (lib_uar_dyn) {
> > + bfregi->lib_uar_dyn = lib_uar_dyn;
> > + goto uar_done;
> > + }
> > +
> > /* updates req->total_num_bfregs */
> > err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
> > if (err)
> > @@ -1873,6 +1880,7 @@ static int mlx5_ib_alloc_ucontext(struct
> > ib_ucontext *uctx,
> > if (err)
> > goto out_sys_pages;
> >
> > +uar_done:
> > if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
> > err = mlx5_ib_devx_create(dev, true);
> > if (err < 0)
> > @@ -1894,7 +1902,7 @@ static int mlx5_ib_alloc_ucontext(struct
> > ib_ucontext *uctx,
> > INIT_LIST_HEAD(&context->db_page_list);
> > mutex_init(&context->db_page_mutex);
> >
> > - resp.tot_bfregs = req.total_num_bfregs;
> > + resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
> > resp.num_ports = dev->num_ports;
> >
> > if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
> > @@ -2142,6 +2150,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev,
> > enum mlx5_ib_mmap_cmd cmd,
> > int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
> > bfregi->num_static_sys_pages;
> >
> > + if (bfregi->lib_uar_dyn)
> > + return -EINVAL;
> > +
> > if (vma->vm_end - vma->vm_start != PAGE_SIZE)
> > return -EINVAL;
> >
> > diff --git a/drivers/infiniband/hw/mlx5/qp.c
> > b/drivers/infiniband/hw/mlx5/qp.c
> > index 380ba3321851..319d514a2223 100644
> > --- a/drivers/infiniband/hw/mlx5/qp.c
> > +++ b/drivers/infiniband/hw/mlx5/qp.c
> > @@ -697,6 +697,9 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
> > {
> > int bfregn = -ENOMEM;
> >
> > + if (bfregi->lib_uar_dyn)
> > + return -EINVAL;
> > +
> > mutex_lock(&bfregi->lock);
> > if (bfregi->ver >= 2) {
> > bfregn = alloc_high_class_bfreg(dev, bfregi);
> > @@ -768,6 +771,9 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
> > u32 index_of_sys_page;
> > u32 offset;
> >
> > + if (bfregi->lib_uar_dyn)
> > + return -EINVAL;
> > +
> > bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi-
> > >lib_uar_4k) *
> > MLX5_NON_FP_BFREGS_PER_UAR;
> > index_of_sys_page = bfregn / bfregs_per_sys_page;
> > diff --git a/include/linux/mlx5/driver.h
> > b/include/linux/mlx5/driver.h
> > index 3f10a9633012..e4ab0eb9d202 100644
> > --- a/include/linux/mlx5/driver.h
> > +++ b/include/linux/mlx5/driver.h
> > @@ -224,6 +224,7 @@ struct mlx5_bfreg_info {
> > struct mutex lock;
> > u32 ver;
> > bool lib_uar_4k;
> > + u8 lib_uar_dyn : 1;
> > u32 num_sys_pages;
> > u32 num_static_sys_pages
> > u32 total_num_bfregs;
>
> this struct is not used in mlx5_core, shall we move it to mlx5_ib as
> part of this patch?
> so next time you need to update it you don't need to bother netdev busy
> people about it ;-)
Thanks, completely agree. I'll do it now.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 12:43 [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Leon Romanovsky
` (3 preceding siblings ...)
2020-03-18 12:43 ` [PATCH mlx5-next 4/4] IB/mlx5: Move to fully dynamic UAR mode once user space supports it Leon Romanovsky
@ 2020-03-18 12:54 ` Jason Gunthorpe
2020-03-18 13:14 ` Leon Romanovsky
4 siblings, 1 reply; 19+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 12:54 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Doug Ledford, Leon Romanovsky, linux-rdma, Michael Guralnik,
netdev, Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
>
> From Yishai,
>
> This series exposes API to enable a dynamic allocation and management of a
> UAR which now becomes to be a regular uobject.
>
> Moving to that mode enables allocating a UAR only upon demand and drop the
> redundant static allocation of UARs upon context creation.
>
> In addition, it allows master and secondary processes that own the same command
> FD to allocate and manage UARs according to their needs, this can’t be achieved
> today.
>
> As part of this option, QP & CQ creation flows were adapted to support this
> dynamic UAR mode once asked by user space.
>
> Once this mode is asked by mlx5 user space driver on a given context, it will
> be mutual exclusive, means both the static and legacy dynamic modes for using
> UARs will be blocked.
>
> The legacy modes are supported for backward compatible reasons, looking
> forward we expect this new mode to be the default.
We are starting to accumulate a lot of code that is now old-rdma-core
only.
I have been wondering if we should add something like
#if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
#endif
So we can keep track of what is actually a used code flow and what is
now hard to test legacy code.
eg this config would also disable the write interface(), turn off
compat write interfaces as they are switched to use ioctl, etc, etc.
Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 12:54 ` [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode Jason Gunthorpe
@ 2020-03-18 13:14 ` Leon Romanovsky
2020-03-18 13:21 ` Jason Gunthorpe
0 siblings, 1 reply; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 13:14 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@mellanox.com>
> >
> > From Yishai,
> >
> > This series exposes API to enable a dynamic allocation and management of a
> > UAR which now becomes to be a regular uobject.
> >
> > Moving to that mode enables allocating a UAR only upon demand and drop the
> > redundant static allocation of UARs upon context creation.
> >
> > In addition, it allows master and secondary processes that own the same command
> > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > today.
> >
> > As part of this option, QP & CQ creation flows were adapted to support this
> > dynamic UAR mode once asked by user space.
> >
> > Once this mode is asked by mlx5 user space driver on a given context, it will
> > be mutual exclusive, means both the static and legacy dynamic modes for using
> > UARs will be blocked.
> >
> > The legacy modes are supported for backward compatible reasons, looking
> > forward we expect this new mode to be the default.
>
> We are starting to accumulate a lot of code that is now old-rdma-core
> only.
Agree
>
> I have been wondering if we should add something like
>
> #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> #endif
From one side it will definitely help to see old code, but from another
it will create many ifdef inside of the code with a very little chance
of testing. Also we will continue to have the same problem to decide when
we can delete this code.
>
> So we can keep track of what is actually a used code flow and what is
> now hard to test legacy code.
>
> eg this config would also disable the write interface(), turn off
> compat write interfaces as they are switched to use ioctl, etc, etc.
What about if we introduce one ifdef, let's say CONFIG_INFINIBAND_LEGACY
and put everything that will be declared as legacy to that bucket? And
once every 5 (???) years delete everything from that bucket.
>
> Jason
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 13:14 ` Leon Romanovsky
@ 2020-03-18 13:21 ` Jason Gunthorpe
2020-03-18 13:56 ` Leon Romanovsky
0 siblings, 1 reply; 19+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 13:21 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 03:14:50PM +0200, Leon Romanovsky wrote:
> On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> > On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > > From: Leon Romanovsky <leonro@mellanox.com>
> > >
> > > From Yishai,
> > >
> > > This series exposes API to enable a dynamic allocation and management of a
> > > UAR which now becomes to be a regular uobject.
> > >
> > > Moving to that mode enables allocating a UAR only upon demand and drop the
> > > redundant static allocation of UARs upon context creation.
> > >
> > > In addition, it allows master and secondary processes that own the same command
> > > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > > today.
> > >
> > > As part of this option, QP & CQ creation flows were adapted to support this
> > > dynamic UAR mode once asked by user space.
> > >
> > > Once this mode is asked by mlx5 user space driver on a given context, it will
> > > be mutual exclusive, means both the static and legacy dynamic modes for using
> > > UARs will be blocked.
> > >
> > > The legacy modes are supported for backward compatible reasons, looking
> > > forward we expect this new mode to be the default.
> >
> > We are starting to accumulate a lot of code that is now old-rdma-core
> > only.
>
> Agree
>
> >
> > I have been wondering if we should add something like
> >
> > #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> > #endif
>
> From one side it will definitely help to see old code, but from another
> it will create many ifdef inside of the code with a very little chance
> of testing. Also we will continue to have the same problem to decide when
> we can delete this code.
Well, it doesn't have to be an #ifdef, eg just sticking
if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
return -ENOPROTOOPT;
at the top of obsolete functions would go a long way
> > So we can keep track of what is actually a used code flow and what is
> > now hard to test legacy code.
> >
> > eg this config would also disable the write interface(), turn off
> > compat write interfaces as they are switched to use ioctl, etc, etc.
>
> What about if we introduce one ifdef, let's say CONFIG_INFINIBAND_LEGACY
> and put everything that will be declared as legacy to that bucket? And
> once every 5 (???) years delete everything from that bucket.
It is much harder to see what is really old vs only a little old
I'm not sure we can ever completely delete any of this, but at least
the distros can make an informed choice to either do more detailed
test of old libraries or disable those code paths.
Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 13:21 ` Jason Gunthorpe
@ 2020-03-18 13:56 ` Leon Romanovsky
2020-03-18 14:00 ` Jason Gunthorpe
0 siblings, 1 reply; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 13:56 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 10:21:00AM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 18, 2020 at 03:14:50PM +0200, Leon Romanovsky wrote:
> > On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > > > From: Leon Romanovsky <leonro@mellanox.com>
> > > >
> > > > From Yishai,
> > > >
> > > > This series exposes API to enable a dynamic allocation and management of a
> > > > UAR which now becomes to be a regular uobject.
> > > >
> > > > Moving to that mode enables allocating a UAR only upon demand and drop the
> > > > redundant static allocation of UARs upon context creation.
> > > >
> > > > In addition, it allows master and secondary processes that own the same command
> > > > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > > > today.
> > > >
> > > > As part of this option, QP & CQ creation flows were adapted to support this
> > > > dynamic UAR mode once asked by user space.
> > > >
> > > > Once this mode is asked by mlx5 user space driver on a given context, it will
> > > > be mutual exclusive, means both the static and legacy dynamic modes for using
> > > > UARs will be blocked.
> > > >
> > > > The legacy modes are supported for backward compatible reasons, looking
> > > > forward we expect this new mode to be the default.
> > >
> > > We are starting to accumulate a lot of code that is now old-rdma-core
> > > only.
> >
> > Agree
> >
> > >
> > > I have been wondering if we should add something like
> > >
> > > #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> > > #endif
> >
> > From one side it will definitely help to see old code, but from another
> > it will create many ifdef inside of the code with a very little chance
> > of testing. Also we will continue to have the same problem to decide when
> > we can delete this code.
>
> Well, it doesn't have to be an #ifdef, eg just sticking
>
> if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> return -ENOPROTOOPT;
>
> at the top of obsolete functions would go a long way
First, how will you set this min_version? hordcoded in the kernel code?
Second, it will work for simple flows, but can be extremely complex
if your code looks like:
if (old_version)
do something
if (new version)
do something else
You will need to add logic to handle this -ENOPROTOOPT error value.
>
> > > So we can keep track of what is actually a used code flow and what is
> > > now hard to test legacy code.
> > >
> > > eg this config would also disable the write interface(), turn off
> > > compat write interfaces as they are switched to use ioctl, etc, etc.
> >
> > What about if we introduce one ifdef, let's say CONFIG_INFINIBAND_LEGACY
> > and put everything that will be declared as legacy to that bucket? And
> > once every 5 (???) years delete everything from that bucket.
>
> It is much harder to see what is really old vs only a little old
>
> I'm not sure we can ever completely delete any of this, but at least
> the distros can make an informed choice to either do more detailed
> test of old libraries or disable those code paths.
It will be nice to hear how distros decide to disable/drop the code.
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 13:56 ` Leon Romanovsky
@ 2020-03-18 14:00 ` Jason Gunthorpe
2020-03-18 14:09 ` Leon Romanovsky
0 siblings, 1 reply; 19+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 14:00 UTC (permalink / raw)
To: Leon Romanovsky, Doug Ledford
Cc: linux-rdma, Michael Guralnik, netdev, Saeed Mahameed,
Yishai Hadas
On Wed, Mar 18, 2020 at 03:56:31PM +0200, Leon Romanovsky wrote:
> On Wed, Mar 18, 2020 at 10:21:00AM -0300, Jason Gunthorpe wrote:
> > On Wed, Mar 18, 2020 at 03:14:50PM +0200, Leon Romanovsky wrote:
> > > On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > > > > From: Leon Romanovsky <leonro@mellanox.com>
> > > > >
> > > > > From Yishai,
> > > > >
> > > > > This series exposes API to enable a dynamic allocation and management of a
> > > > > UAR which now becomes to be a regular uobject.
> > > > >
> > > > > Moving to that mode enables allocating a UAR only upon demand and drop the
> > > > > redundant static allocation of UARs upon context creation.
> > > > >
> > > > > In addition, it allows master and secondary processes that own the same command
> > > > > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > > > > today.
> > > > >
> > > > > As part of this option, QP & CQ creation flows were adapted to support this
> > > > > dynamic UAR mode once asked by user space.
> > > > >
> > > > > Once this mode is asked by mlx5 user space driver on a given context, it will
> > > > > be mutual exclusive, means both the static and legacy dynamic modes for using
> > > > > UARs will be blocked.
> > > > >
> > > > > The legacy modes are supported for backward compatible reasons, looking
> > > > > forward we expect this new mode to be the default.
> > > >
> > > > We are starting to accumulate a lot of code that is now old-rdma-core
> > > > only.
> > >
> > > Agree
> > >
> > > >
> > > > I have been wondering if we should add something like
> > > >
> > > > #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> > > > #endif
> > >
> > > From one side it will definitely help to see old code, but from another
> > > it will create many ifdef inside of the code with a very little chance
> > > of testing. Also we will continue to have the same problem to decide when
> > > we can delete this code.
> >
> > Well, it doesn't have to be an #ifdef, eg just sticking
> >
> > if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> > return -ENOPROTOOPT;
> >
> > at the top of obsolete functions would go a long way
>
> First, how will you set this min_version? hordcoded in the kernel
> code?
Yes, when a rdma-core release obsoletes the code path then it can
become annotated.
> Second, it will work for simple flows, but can be extremely complex
> if your code looks like:
> if (old_version)
> do something
> if (new version)
> do something else
Well, we'd avoid making such complications, it would be something like
if (flag & foo) {
if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
return -ENOPROTOOPT;
[keep going as before]
}
At least we now know this conditional path isn't used / isn't covered
by testing
Doug? What does a distro think?
Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 14:00 ` Jason Gunthorpe
@ 2020-03-18 14:09 ` Leon Romanovsky
2020-03-18 14:12 ` Jason Gunthorpe
0 siblings, 1 reply; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 14:09 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 11:00:01AM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 18, 2020 at 03:56:31PM +0200, Leon Romanovsky wrote:
> > On Wed, Mar 18, 2020 at 10:21:00AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Mar 18, 2020 at 03:14:50PM +0200, Leon Romanovsky wrote:
> > > > On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> > > > > On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > > > > > From: Leon Romanovsky <leonro@mellanox.com>
> > > > > >
> > > > > > From Yishai,
> > > > > >
> > > > > > This series exposes API to enable a dynamic allocation and management of a
> > > > > > UAR which now becomes to be a regular uobject.
> > > > > >
> > > > > > Moving to that mode enables allocating a UAR only upon demand and drop the
> > > > > > redundant static allocation of UARs upon context creation.
> > > > > >
> > > > > > In addition, it allows master and secondary processes that own the same command
> > > > > > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > > > > > today.
> > > > > >
> > > > > > As part of this option, QP & CQ creation flows were adapted to support this
> > > > > > dynamic UAR mode once asked by user space.
> > > > > >
> > > > > > Once this mode is asked by mlx5 user space driver on a given context, it will
> > > > > > be mutual exclusive, means both the static and legacy dynamic modes for using
> > > > > > UARs will be blocked.
> > > > > >
> > > > > > The legacy modes are supported for backward compatible reasons, looking
> > > > > > forward we expect this new mode to be the default.
> > > > >
> > > > > We are starting to accumulate a lot of code that is now old-rdma-core
> > > > > only.
> > > >
> > > > Agree
> > > >
> > > > >
> > > > > I have been wondering if we should add something like
> > > > >
> > > > > #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> > > > > #endif
> > > >
> > > > From one side it will definitely help to see old code, but from another
> > > > it will create many ifdef inside of the code with a very little chance
> > > > of testing. Also we will continue to have the same problem to decide when
> > > > we can delete this code.
> > >
> > > Well, it doesn't have to be an #ifdef, eg just sticking
> > >
> > > if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> > > return -ENOPROTOOPT;
> > >
> > > at the top of obsolete functions would go a long way
> >
> > First, how will you set this min_version? hordcoded in the kernel
> > code?
>
> Yes, when a rdma-core release obsoletes the code path then it can
> become annotated.
>
> > Second, it will work for simple flows, but can be extremely complex
> > if your code looks like:
> > if (old_version)
> > do something
> > if (new version)
> > do something else
>
> Well, we'd avoid making such complications, it would be something like
>
> if (flag & foo) {
> if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> return -ENOPROTOOPT;
> [keep going as before]
> }
>
> At least we now know this conditional path isn't used / isn't covered
> by testing
I'm ok with this approach because it helps us to find those dead
paths, but have last question, shouldn't this be achieved with
proper documentation of every flag instead of adding CONFIG_..?
>
> Doug? What does a distro think?
>
> Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 14:09 ` Leon Romanovsky
@ 2020-03-18 14:12 ` Jason Gunthorpe
2020-03-18 14:24 ` Leon Romanovsky
0 siblings, 1 reply; 19+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 14:12 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 04:09:32PM +0200, Leon Romanovsky wrote:
> On Wed, Mar 18, 2020 at 11:00:01AM -0300, Jason Gunthorpe wrote:
> > On Wed, Mar 18, 2020 at 03:56:31PM +0200, Leon Romanovsky wrote:
> > > On Wed, Mar 18, 2020 at 10:21:00AM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Mar 18, 2020 at 03:14:50PM +0200, Leon Romanovsky wrote:
> > > > > On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> > > > > > On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > > > > > > From: Leon Romanovsky <leonro@mellanox.com>
> > > > > > >
> > > > > > > From Yishai,
> > > > > > >
> > > > > > > This series exposes API to enable a dynamic allocation and management of a
> > > > > > > UAR which now becomes to be a regular uobject.
> > > > > > >
> > > > > > > Moving to that mode enables allocating a UAR only upon demand and drop the
> > > > > > > redundant static allocation of UARs upon context creation.
> > > > > > >
> > > > > > > In addition, it allows master and secondary processes that own the same command
> > > > > > > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > > > > > > today.
> > > > > > >
> > > > > > > As part of this option, QP & CQ creation flows were adapted to support this
> > > > > > > dynamic UAR mode once asked by user space.
> > > > > > >
> > > > > > > Once this mode is asked by mlx5 user space driver on a given context, it will
> > > > > > > be mutual exclusive, means both the static and legacy dynamic modes for using
> > > > > > > UARs will be blocked.
> > > > > > >
> > > > > > > The legacy modes are supported for backward compatible reasons, looking
> > > > > > > forward we expect this new mode to be the default.
> > > > > >
> > > > > > We are starting to accumulate a lot of code that is now old-rdma-core
> > > > > > only.
> > > > >
> > > > > Agree
> > > > >
> > > > > >
> > > > > > I have been wondering if we should add something like
> > > > > >
> > > > > > #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> > > > > > #endif
> > > > >
> > > > > From one side it will definitely help to see old code, but from another
> > > > > it will create many ifdef inside of the code with a very little chance
> > > > > of testing. Also we will continue to have the same problem to decide when
> > > > > we can delete this code.
> > > >
> > > > Well, it doesn't have to be an #ifdef, eg just sticking
> > > >
> > > > if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> > > > return -ENOPROTOOPT;
> > > >
> > > > at the top of obsolete functions would go a long way
> > >
> > > First, how will you set this min_version? hordcoded in the kernel
> > > code?
> >
> > Yes, when a rdma-core release obsoletes the code path then it can
> > become annotated.
> >
> > > Second, it will work for simple flows, but can be extremely complex
> > > if your code looks like:
> > > if (old_version)
> > > do something
> > > if (new version)
> > > do something else
> >
> > Well, we'd avoid making such complications, it would be something like
> >
> > if (flag & foo) {
> > if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> > return -ENOPROTOOPT;
> > [keep going as before]
> > }
> >
> > At least we now know this conditional path isn't used / isn't covered
> > by testing
>
> I'm ok with this approach because it helps us to find those dead
> paths, but have last question, shouldn't this be achieved with
> proper documentation of every flag instead of adding CONFIG_..?
How do you mean?
The other half of this idea is to disable obsolete un tested code to
avoid potential bugs. Which requires CONFIG_?
Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 14:12 ` Jason Gunthorpe
@ 2020-03-18 14:24 ` Leon Romanovsky
2020-03-18 14:39 ` Jason Gunthorpe
0 siblings, 1 reply; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 14:24 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 11:12:08AM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 18, 2020 at 04:09:32PM +0200, Leon Romanovsky wrote:
> > On Wed, Mar 18, 2020 at 11:00:01AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Mar 18, 2020 at 03:56:31PM +0200, Leon Romanovsky wrote:
> > > > On Wed, Mar 18, 2020 at 10:21:00AM -0300, Jason Gunthorpe wrote:
> > > > > On Wed, Mar 18, 2020 at 03:14:50PM +0200, Leon Romanovsky wrote:
> > > > > > On Wed, Mar 18, 2020 at 09:54:59AM -0300, Jason Gunthorpe wrote:
> > > > > > > On Wed, Mar 18, 2020 at 02:43:25PM +0200, Leon Romanovsky wrote:
> > > > > > > > From: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > >
> > > > > > > > From Yishai,
> > > > > > > >
> > > > > > > > This series exposes API to enable a dynamic allocation and management of a
> > > > > > > > UAR which now becomes to be a regular uobject.
> > > > > > > >
> > > > > > > > Moving to that mode enables allocating a UAR only upon demand and drop the
> > > > > > > > redundant static allocation of UARs upon context creation.
> > > > > > > >
> > > > > > > > In addition, it allows master and secondary processes that own the same command
> > > > > > > > FD to allocate and manage UARs according to their needs, this can’t be achieved
> > > > > > > > today.
> > > > > > > >
> > > > > > > > As part of this option, QP & CQ creation flows were adapted to support this
> > > > > > > > dynamic UAR mode once asked by user space.
> > > > > > > >
> > > > > > > > Once this mode is asked by mlx5 user space driver on a given context, it will
> > > > > > > > be mutual exclusive, means both the static and legacy dynamic modes for using
> > > > > > > > UARs will be blocked.
> > > > > > > >
> > > > > > > > The legacy modes are supported for backward compatible reasons, looking
> > > > > > > > forward we expect this new mode to be the default.
> > > > > > >
> > > > > > > We are starting to accumulate a lot of code that is now old-rdma-core
> > > > > > > only.
> > > > > >
> > > > > > Agree
> > > > > >
> > > > > > >
> > > > > > > I have been wondering if we should add something like
> > > > > > >
> > > > > > > #if CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION < 21
> > > > > > > #endif
> > > > > >
> > > > > > From one side it will definitely help to see old code, but from another
> > > > > > it will create many ifdef inside of the code with a very little chance
> > > > > > of testing. Also we will continue to have the same problem to decide when
> > > > > > we can delete this code.
> > > > >
> > > > > Well, it doesn't have to be an #ifdef, eg just sticking
> > > > >
> > > > > if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> > > > > return -ENOPROTOOPT;
> > > > >
> > > > > at the top of obsolete functions would go a long way
> > > >
> > > > First, how will you set this min_version? hordcoded in the kernel
> > > > code?
> > >
> > > Yes, when a rdma-core release obsoletes the code path then it can
> > > become annotated.
> > >
> > > > Second, it will work for simple flows, but can be extremely complex
> > > > if your code looks like:
> > > > if (old_version)
> > > > do something
> > > > if (new version)
> > > > do something else
> > >
> > > Well, we'd avoid making such complications, it would be something like
> > >
> > > if (flag & foo) {
> > > if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 21)
> > > return -ENOPROTOOPT;
> > > [keep going as before]
> > > }
> > >
> > > At least we now know this conditional path isn't used / isn't covered
> > > by testing
> >
> > I'm ok with this approach because it helps us to find those dead
> > paths, but have last question, shouldn't this be achieved with
> > proper documentation of every flag instead of adding CONFIG_..?
>
> How do you mean?
>
> The other half of this idea is to disable obsolete un tested code to
> avoid potential bugs. Which requires CONFIG_?
The second part is achievable by distros when they will decide to
support starting from version X. The same decision is not so easy
to do in the upstream.
Let's take as an example this feature. It will be set as default from
rdma-core v29 and the legacy code will be guarded by
"if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 29)". When will change
CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION to be above 29? So we will
delete such legacy code.
In the world where we are not breaking user space, it will never happen.
It means that upstream doesn't get anything from those CONFIG_*s.
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 14:24 ` Leon Romanovsky
@ 2020-03-18 14:39 ` Jason Gunthorpe
2020-03-18 17:07 ` Leon Romanovsky
0 siblings, 1 reply; 19+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 14:39 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 04:24:55PM +0200, Leon Romanovsky wrote:
> > > I'm ok with this approach because it helps us to find those dead
> > > paths, but have last question, shouldn't this be achieved with
> > > proper documentation of every flag instead of adding CONFIG_..?
> >
> > How do you mean?
> >
> > The other half of this idea is to disable obsolete un tested code to
> > avoid potential bugs. Which requires CONFIG_?
>
> The second part is achievable by distros when they will decide to
> support starting from version X. The same decision is not so easy
> to do in the upstream.
Upstream will probably carry the code for a long, long time, that
doesn't mean the distros don't get value by using a shorter time
window
> Let's take as an example this feature. It will be set as default from
> rdma-core v29 and the legacy code will be guarded by
> "if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 29)". When will change
> CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION to be above 29? So we will
> delete such legacy code.
First the distros will decide in their own kconfigs where they want to
set the value.
Then the upstream kernel will decide some default value
Then maybe we could talk about lowest values when enough of the user
community uses a higher value
Jason
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH rdma-next 0/4] Introduce dynamic UAR allocation mode
2020-03-18 14:39 ` Jason Gunthorpe
@ 2020-03-18 17:07 ` Leon Romanovsky
0 siblings, 0 replies; 19+ messages in thread
From: Leon Romanovsky @ 2020-03-18 17:07 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Doug Ledford, linux-rdma, Michael Guralnik, netdev,
Saeed Mahameed, Yishai Hadas
On Wed, Mar 18, 2020 at 11:39:03AM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 18, 2020 at 04:24:55PM +0200, Leon Romanovsky wrote:
> > > > I'm ok with this approach because it helps us to find those dead
> > > > paths, but have last question, shouldn't this be achieved with
> > > > proper documentation of every flag instead of adding CONFIG_..?
> > >
> > > How do you mean?
> > >
> > > The other half of this idea is to disable obsolete un tested code to
> > > avoid potential bugs. Which requires CONFIG_?
> >
> > The second part is achievable by distros when they will decide to
> > support starting from version X. The same decision is not so easy
> > to do in the upstream.
>
> Upstream will probably carry the code for a long, long time, that
> doesn't mean the distros don't get value by using a shorter time
> window
Sure
>
> > Let's take as an example this feature. It will be set as default from
> > rdma-core v29 and the legacy code will be guarded by
> > "if (CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION >= 29)". When will change
> > CONFIG_INFINIBAND_MIN_RDMA_CORE_VERSION to be above 29? So we will
> > delete such legacy code.
>
> First the distros will decide in their own kconfigs where they want to
> set the value.
>
> Then the upstream kernel will decide some default value
>
> Then maybe we could talk about lowest values when enough of the user
> community uses a higher value
I think that you over-optimistic here, but let's hear other voices here.
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 19+ messages in thread