* [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
@ 2026-03-03 12:48 Konstantin Taranov
2026-03-04 11:05 ` Leon Romanovsky
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-03 12:48 UTC (permalink / raw)
To: kotaranov, shirazsaleem, longli, jgg, leon; +Cc: linux-rdma, linux-kernel
From: Konstantin Taranov <kotaranov@microsoft.com>
The uverbs CQ creation UAPI allows users to supply their own umem for a CQ.
Update mana to support this workflow while preserving support for creating
umem through the legacy interface.
To support RDMA objects that own umem, extend mana_ib_create_queue() to return
the umem to the caller and do not allocate umem if it was allocted
by the caller.
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
v2: It is a rework of the patch proposed by Leon
drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
drivers/infiniband/hw/mana/device.c | 1 +
drivers/infiniband/hw/mana/main.c | 30 +++++--
drivers/infiniband/hw/mana/mana_ib.h | 5 +-
drivers/infiniband/hw/mana/qp.c | 5 +-
drivers/infiniband/hw/mana/wq.c | 3 +-
6 files changed, 111 insertions(+), 58 deletions(-)
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index b2749f971..fa951732a 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -8,12 +8,8 @@
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs)
{
- struct ib_udata *udata = &attrs->driver_udata;
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
- struct mana_ib_create_cq_resp resp = {};
- struct mana_ib_ucontext *mana_ucontext;
struct ib_device *ibdev = ibcq->device;
- struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
bool is_rnic_cq;
u32 doorbell;
@@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->cq_handle = INVALID_MANA_HANDLE;
is_rnic_cq = mana_ib_is_rnic(mdev);
- if (udata) {
- if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
- return -EINVAL;
-
- err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
- if (err) {
- ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
- return err;
- }
+ if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
+ return -EINVAL;
- if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
- attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = mdev->gdma_dev->doorbell;
- cq->cqe = attr->cqe;
- err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
- &cq->queue);
+ if (is_rnic_cq) {
+ err = mana_ib_gd_create_cq(mdev, cq, doorbell);
if (err) {
- ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
- return err;
+ ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
+ goto err_destroy_queue;
}
- mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
- ibucontext);
- doorbell = mana_ucontext->doorbell;
- } else {
- if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
- buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
- cq->cqe = buf_size / COMP_ENTRY_SIZE;
- err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ err = mana_ib_install_cq_cb(mdev, cq);
if (err) {
- ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
- return err;
+ ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
+ goto err_destroy_rnic_cq;
}
- doorbell = mdev->gdma_dev->doorbell;
}
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
+ return 0;
+
+err_destroy_rnic_cq:
+ mana_ib_gd_destroy_cq(mdev, cq);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
+ return err;
+}
+
+int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct mana_ib_create_cq_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_create_cq ucmd = {};
+ struct mana_ib_dev *mdev;
+ bool is_rnic_cq;
+ u32 doorbell;
+ int err;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
+ cq->cq_handle = INVALID_MANA_HANDLE;
+ is_rnic_cq = mana_ib_is_rnic(mdev);
+
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
+
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
+ return -EINVAL;
+
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue, &ibcq->umem);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+
if (is_rnic_cq) {
err = mana_ib_gd_create_cq(mdev, cq, doorbell);
if (err) {
@@ -82,13 +121,11 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
}
- if (udata) {
- resp.cqid = cq->queue.id;
- err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
- if (err) {
- ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
- goto err_remove_cq_cb;
- }
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
}
spin_lock_init(&cq->cq_lock);
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index ccc2279ca..c5c5fe051 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -21,6 +21,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.alloc_ucontext = mana_ib_alloc_ucontext,
.create_ah = mana_ib_create_ah,
.create_cq = mana_ib_create_cq,
+ .create_user_cq = mana_ib_create_user_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
.create_wq = mana_ib_create_wq,
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 8d99cd00f..d1f1e217b 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -262,33 +262,45 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
}
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
- struct mana_ib_queue *queue)
+ struct mana_ib_queue *queue, struct ib_umem **umem)
{
- struct ib_umem *umem;
int err;
queue->umem = NULL;
queue->id = INVALID_QUEUE_ID;
queue->gdma_region = GDMA_INVALID_DMA_REGION;
- umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(umem)) {
- ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
- return PTR_ERR(umem);
+ if (umem)
+ queue->umem = *umem;
+
+ if (!queue->umem) {
+ /* if umem is not provided, allocate it */
+ queue->umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(queue->umem)) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", queue->umem);
+ return PTR_ERR(queue->umem);
+ }
}
- err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
+ err = mana_ib_create_zero_offset_dma_region(mdev, queue->umem, &queue->gdma_region);
if (err) {
ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
goto free_umem;
}
- queue->umem = umem;
ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
+ if (umem) {
+ /* Give ownership of umem to the caller */
+ *umem = queue->umem;
+ queue->umem = NULL;
+ }
+
return 0;
free_umem:
- ib_umem_release(umem);
+ if (!umem || !(*umem))
+ /* deallocate mana's umem */
+ ib_umem_release(queue->umem);
return err;
}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index a7c8c0fd7..56ebd086b 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -625,7 +625,7 @@ int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
struct mana_ib_queue *queue);
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
- struct mana_ib_queue *queue);
+ struct mana_ib_queue *queue, struct ib_umem **umem);
void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
@@ -667,7 +667,8 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
-
+int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 82f84f7ad..29cf3f85a 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -325,7 +325,8 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
ucmd.sq_buf_addr, ucmd.port);
- err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq);
+ err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size,
+ &qp->raw_sq, NULL);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create queue for create qp-raw, err %d\n", err);
@@ -555,7 +556,7 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
continue;
}
err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
- &qp->rc_qp.queues[i]);
+ &qp->rc_qp.queues[i], NULL);
if (err) {
ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
goto destroy_queues;
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
index 6206244f7..944218b12 100644
--- a/drivers/infiniband/hw/mana/wq.c
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -31,7 +31,8 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
- err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue);
+ err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size,
+ &wq->queue, NULL);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create queue for create wq, %d\n", err);
--
2.43.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-03 12:48 [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface Konstantin Taranov
@ 2026-03-04 11:05 ` Leon Romanovsky
2026-03-04 11:41 ` [EXTERNAL] " Konstantin Taranov
0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2026-03-04 11:05 UTC (permalink / raw)
To: Konstantin Taranov
Cc: kotaranov, shirazsaleem, longli, jgg, linux-rdma, linux-kernel
On Tue, Mar 03, 2026 at 04:48:25AM -0800, Konstantin Taranov wrote:
> From: Konstantin Taranov <kotaranov@microsoft.com>
>
> The uverbs CQ creation UAPI allows users to supply their own umem for a CQ.
> Update mana to support this workflow while preserving support for creating
> umem through the legacy interface.
>
> To support RDMA objects that own umem, extend mana_ib_create_queue() to return
> the umem to the caller and do not allocate umem if it was allocted
> by the caller.
>
> Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> ---
> v2: It is a rework of the patch proposed by Leon
I am curious to know what changes were introduced?
> drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
> drivers/infiniband/hw/mana/device.c | 1 +
> drivers/infiniband/hw/mana/main.c | 30 +++++--
> drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> drivers/infiniband/hw/mana/qp.c | 5 +-
> drivers/infiniband/hw/mana/wq.c | 3 +-
> 6 files changed, 111 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
> index b2749f971..fa951732a 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -8,12 +8,8 @@
> int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> struct uverbs_attr_bundle *attrs)
> {
> - struct ib_udata *udata = &attrs->driver_udata;
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> - struct mana_ib_create_cq_resp resp = {};
> - struct mana_ib_ucontext *mana_ucontext;
> struct ib_device *ibdev = ibcq->device;
> - struct mana_ib_create_cq ucmd = {};
> struct mana_ib_dev *mdev;
> bool is_rnic_cq;
> u32 doorbell;
> @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> cq->cq_handle = INVALID_MANA_HANDLE;
> is_rnic_cq = mana_ib_is_rnic(mdev);
>
> - if (udata) {
> - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> - return -EINVAL;
> -
> - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
> - if (err) {
> - ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
> - return err;
> - }
> + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> + return -EINVAL;
We are talking about kernel verbs. ULPs are not designed to provide
attributes and recover from random driver limitations.
>
> - if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
> - return -EINVAL;
> - }
> + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
> + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
> + if (err) {
> + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
> + return err;
> + }
> + doorbell = mdev->gdma_dev->doorbell;
>
> - cq->cqe = attr->cqe;
> - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
> - &cq->queue);
> + if (is_rnic_cq) {
> + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> if (err) {
> - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
> - return err;
> + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
> + goto err_destroy_queue;
> }
>
> - mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> - ibucontext);
> - doorbell = mana_ucontext->doorbell;
> - } else {
> - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
> - return -EINVAL;
> - }
> - buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
> - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> - err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
> + err = mana_ib_install_cq_cb(mdev, cq);
> if (err) {
> - ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
> - return err;
> + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
> + goto err_destroy_rnic_cq;
> }
> - doorbell = mdev->gdma_dev->doorbell;
> }
>
> + spin_lock_init(&cq->cq_lock);
> + INIT_LIST_HEAD(&cq->list_send_qp);
> + INIT_LIST_HEAD(&cq->list_recv_qp);
> +
> + return 0;
> +
> +err_destroy_rnic_cq:
> + mana_ib_gd_destroy_cq(mdev, cq);
> +err_destroy_queue:
> + mana_ib_destroy_queue(mdev, &cq->queue);
> +
> + return err;
> +}
> +
> +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> + struct uverbs_attr_bundle *attrs)
> +{
> + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> + struct ib_udata *udata = &attrs->driver_udata;
> + struct mana_ib_create_cq_resp resp = {};
> + struct mana_ib_ucontext *mana_ucontext;
> + struct ib_device *ibdev = ibcq->device;
> + struct mana_ib_create_cq ucmd = {};
> + struct mana_ib_dev *mdev;
> + bool is_rnic_cq;
> + u32 doorbell;
> + int err;
> +
> + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> +
> + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> + cq->cq_handle = INVALID_MANA_HANDLE;
> + is_rnic_cq = mana_ib_is_rnic(mdev);
> +
> + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> + return -EINVAL;
> +
> + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
> + if (err) {
> + ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
> + return err;
> + }
> +
> + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> + return -EINVAL;
> +
> + cq->cqe = attr->cqe;
> + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
> + &cq->queue, &ibcq->umem);
> + if (err) {
> + ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
> + return err;
> + }
> +
> + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> + ibucontext);
> + doorbell = mana_ucontext->doorbell;
> +
> if (is_rnic_cq) {
> err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> if (err) {
> @@ -82,13 +121,11 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> }
> }
>
> - if (udata) {
> - resp.cqid = cq->queue.id;
> - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
> - if (err) {
> - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
> - goto err_remove_cq_cb;
> - }
> + resp.cqid = cq->queue.id;
> + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
> + if (err) {
> + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
> + goto err_remove_cq_cb;
> }
>
> spin_lock_init(&cq->cq_lock);
> diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
> index ccc2279ca..c5c5fe051 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -21,6 +21,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
> .alloc_ucontext = mana_ib_alloc_ucontext,
> .create_ah = mana_ib_create_ah,
> .create_cq = mana_ib_create_cq,
> + .create_user_cq = mana_ib_create_user_cq,
> .create_qp = mana_ib_create_qp,
> .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
> .create_wq = mana_ib_create_wq,
> diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
> index 8d99cd00f..d1f1e217b 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -262,33 +262,45 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
> }
>
> int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
> - struct mana_ib_queue *queue)
> + struct mana_ib_queue *queue, struct ib_umem **umem)
> {
> - struct ib_umem *umem;
> int err;
>
> queue->umem = NULL;
> queue->id = INVALID_QUEUE_ID;
> queue->gdma_region = GDMA_INVALID_DMA_REGION;
>
> - umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
> - if (IS_ERR(umem)) {
> - ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
> - return PTR_ERR(umem);
> + if (umem)
> + queue->umem = *umem;
> +
> + if (!queue->umem) {
> + /* if umem is not provided, allocate it */
> + queue->umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
> + if (IS_ERR(queue->umem)) {
> + ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", queue->umem);
> + return PTR_ERR(queue->umem);
> + }
I moved this hunk to the callers on purpose. The idea is to call to
ib_umem_get() as early as possible.
> }
>
> - err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
> + err = mana_ib_create_zero_offset_dma_region(mdev, queue->umem, &queue->gdma_region);
> if (err) {
> ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
> goto free_umem;
> }
> - queue->umem = umem;
>
> ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
>
> + if (umem) {
> + /* Give ownership of umem to the caller */
> + *umem = queue->umem;
> + queue->umem = NULL;
> + }
> +
> return 0;
> free_umem:
> - ib_umem_release(umem);
> + if (!umem || !(*umem))
> + /* deallocate mana's umem */
> + ib_umem_release(queue->umem);
This is another reason why ib_umem_get() shouldn't be buried in the
driver's internals. IB/core is responsible to release it.
Thanks
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [EXTERNAL] Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-04 11:05 ` Leon Romanovsky
@ 2026-03-04 11:41 ` Konstantin Taranov
2026-03-04 13:23 ` Konstantin Taranov
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-04 11:41 UTC (permalink / raw)
To: Leon Romanovsky, Konstantin Taranov
Cc: Shiraz Saleem, Long Li, jgg@ziepe.ca, linux-rdma@vger.kernel.org,
linux-kernel@vger.kernel.org
> >
> > The uverbs CQ creation UAPI allows users to supply their own umem for a
> CQ.
> > Update mana to support this workflow while preserving support for
> > creating umem through the legacy interface.
> >
> > To support RDMA objects that own umem, extend
> mana_ib_create_queue()
> > to return the umem to the caller and do not allocate umem if it was
> > allocted by the caller.
> >
> > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > ---
> > v2: It is a rework of the patch proposed by Leon
>
> I am curious to know what changes were introduced?
It is like your patch, but I kept get_umem in mana_ib_create_queue and introduced ownership.
It made the code simpler and extendable. In your proposal, it was hard to track the changes
and it led to double free of the umem. With new mana_ib_create_queue() it is clear from the caller
what happens and no special changes in the caller required.
>
> > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
> > drivers/infiniband/hw/mana/device.c | 1 +
> > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > drivers/infiniband/hw/mana/qp.c | 5 +-
> > drivers/infiniband/hw/mana/wq.c | 3 +-
> > 6 files changed, 111 insertions(+), 58 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mana/cq.c
> > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a 100644
> > --- a/drivers/infiniband/hw/mana/cq.c
> > +++ b/drivers/infiniband/hw/mana/cq.c
> > @@ -8,12 +8,8 @@
> > int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr
> *attr,
> > struct uverbs_attr_bundle *attrs) {
> > - struct ib_udata *udata = &attrs->driver_udata;
> > struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > - struct mana_ib_create_cq_resp resp = {};
> > - struct mana_ib_ucontext *mana_ucontext;
> > struct ib_device *ibdev = ibcq->device;
> > - struct mana_ib_create_cq ucmd = {};
> > struct mana_ib_dev *mdev;
> > bool is_rnic_cq;
> > u32 doorbell;
> > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const
> struct ib_cq_init_attr *attr,
> > cq->cq_handle = INVALID_MANA_HANDLE;
> > is_rnic_cq = mana_ib_is_rnic(mdev);
> >
> > - if (udata) {
> > - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > - return -EINVAL;
> > -
> > - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> udata->inlen));
> > - if (err) {
> > - ibdev_dbg(ibdev, "Failed to copy from udata for
> create cq, %d\n", err);
> > - return err;
> > - }
> > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > + return -EINVAL;
>
> We are talking about kernel verbs. ULPs are not designed to provide
> attributes and recover from random driver limitations.
I understand, but there was an ask before to add that check as some automated
code verifier detected overflow. So if we remote it, I guess we get again an ask to fix
the potential overflow.
>
> >
> > - if ((!is_rnic_cq && attr->cqe > mdev-
> >adapter_caps.max_qp_wr) ||
> > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> >cqe);
> > - return -EINVAL;
> > - }
> > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe *
> COMP_ENTRY_SIZE));
> > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ,
> &cq->queue);
> > + if (err) {
> > + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq,
> %d\n", err);
> > + return err;
> > + }
> > + doorbell = mdev->gdma_dev->doorbell;
> >
> > - cq->cqe = attr->cqe;
> > - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> * COMP_ENTRY_SIZE,
> > - &cq->queue);
> > + if (is_rnic_cq) {
> > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > if (err) {
> > - ibdev_dbg(ibdev, "Failed to create queue for create
> cq, %d\n", err);
> > - return err;
> > + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n",
> err);
> > + goto err_destroy_queue;
> > }
> >
> > - mana_ucontext = rdma_udata_to_drv_context(udata, struct
> mana_ib_ucontext,
> > - ibucontext);
> > - doorbell = mana_ucontext->doorbell;
> > - } else {
> > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> >cqe);
> > - return -EINVAL;
> > - }
> > - buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> >cqe * COMP_ENTRY_SIZE));
> > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> GDMA_CQ, &cq->queue);
> > + err = mana_ib_install_cq_cb(mdev, cq);
> > if (err) {
> > - ibdev_dbg(ibdev, "Failed to create kernel queue for
> create cq, %d\n", err);
> > - return err;
> > + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n",
> err);
> > + goto err_destroy_rnic_cq;
> > }
> > - doorbell = mdev->gdma_dev->doorbell;
> > }
> >
> > + spin_lock_init(&cq->cq_lock);
> > + INIT_LIST_HEAD(&cq->list_send_qp);
> > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > +
> > + return 0;
> > +
> > +err_destroy_rnic_cq:
> > + mana_ib_gd_destroy_cq(mdev, cq);
> > +err_destroy_queue:
> > + mana_ib_destroy_queue(mdev, &cq->queue);
> > +
> > + return err;
> > +}
> > +
> > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> ib_cq_init_attr *attr,
> > + struct uverbs_attr_bundle *attrs) {
> > + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > + struct ib_udata *udata = &attrs->driver_udata;
> > + struct mana_ib_create_cq_resp resp = {};
> > + struct mana_ib_ucontext *mana_ucontext;
> > + struct ib_device *ibdev = ibcq->device;
> > + struct mana_ib_create_cq ucmd = {};
> > + struct mana_ib_dev *mdev;
> > + bool is_rnic_cq;
> > + u32 doorbell;
> > + int err;
> > +
> > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > +
> > + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> > + cq->cq_handle = INVALID_MANA_HANDLE;
> > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > +
> > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > + return -EINVAL;
> > +
> > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> > + if (err) {
> > + ibdev_dbg(ibdev, "Failed to copy from udata for create cq,
> %d\n", err);
> > + return err;
> > + }
> > +
> > + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > + return -EINVAL;
> > +
> > + cq->cqe = attr->cqe;
> > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe *
> COMP_ENTRY_SIZE,
> > + &cq->queue, &ibcq->umem);
> > + if (err) {
> > + ibdev_dbg(ibdev, "Failed to create queue for create cq,
> %d\n", err);
> > + return err;
> > + }
> > +
> > + mana_ucontext = rdma_udata_to_drv_context(udata, struct
> mana_ib_ucontext,
> > + ibucontext);
> > + doorbell = mana_ucontext->doorbell;
> > +
> > if (is_rnic_cq) {
> > err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > if (err) {
> > @@ -82,13 +121,11 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const
> struct ib_cq_init_attr *attr,
> > }
> > }
> >
> > - if (udata) {
> > - resp.cqid = cq->queue.id;
> > - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp),
> udata->outlen));
> > - if (err) {
> > - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata,
> %d\n", err);
> > - goto err_remove_cq_cb;
> > - }
> > + resp.cqid = cq->queue.id;
> > + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata-
> >outlen));
> > + if (err) {
> > + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n",
> err);
> > + goto err_remove_cq_cb;
> > }
> >
> > spin_lock_init(&cq->cq_lock);
> > diff --git a/drivers/infiniband/hw/mana/device.c
> > b/drivers/infiniband/hw/mana/device.c
> > index ccc2279ca..c5c5fe051 100644
> > --- a/drivers/infiniband/hw/mana/device.c
> > +++ b/drivers/infiniband/hw/mana/device.c
> > @@ -21,6 +21,7 @@ static const struct ib_device_ops mana_ib_dev_ops =
> {
> > .alloc_ucontext = mana_ib_alloc_ucontext,
> > .create_ah = mana_ib_create_ah,
> > .create_cq = mana_ib_create_cq,
> > + .create_user_cq = mana_ib_create_user_cq,
> > .create_qp = mana_ib_create_qp,
> > .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
> > .create_wq = mana_ib_create_wq,
> > diff --git a/drivers/infiniband/hw/mana/main.c
> > b/drivers/infiniband/hw/mana/main.c
> > index 8d99cd00f..d1f1e217b 100644
> > --- a/drivers/infiniband/hw/mana/main.c
> > +++ b/drivers/infiniband/hw/mana/main.c
> > @@ -262,33 +262,45 @@ int mana_ib_create_kernel_queue(struct
> > mana_ib_dev *mdev, u32 size, enum gdma_qu }
> >
> > int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
> > - struct mana_ib_queue *queue)
> > + struct mana_ib_queue *queue, struct ib_umem
> **umem)
> > {
> > - struct ib_umem *umem;
> > int err;
> >
> > queue->umem = NULL;
> > queue->id = INVALID_QUEUE_ID;
> > queue->gdma_region = GDMA_INVALID_DMA_REGION;
> >
> > - umem = ib_umem_get(&mdev->ib_dev, addr, size,
> IB_ACCESS_LOCAL_WRITE);
> > - if (IS_ERR(umem)) {
> > - ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n",
> umem);
> > - return PTR_ERR(umem);
> > + if (umem)
> > + queue->umem = *umem;
> > +
> > + if (!queue->umem) {
> > + /* if umem is not provided, allocate it */
> > + queue->umem = ib_umem_get(&mdev->ib_dev, addr, size,
> IB_ACCESS_LOCAL_WRITE);
> > + if (IS_ERR(queue->umem)) {
> > + ibdev_dbg(&mdev->ib_dev, "Failed to get umem,
> %pe\n", queue->umem);
> > + return PTR_ERR(queue->umem);
> > + }
>
> I moved this hunk to the callers on purpose. The idea is to call to
> ib_umem_get() as early as possible.
Fundamentally, it is called at the same moment as in your proposal, but
it is packed in a handy helper we already used before. Instead of adding
boiler plate code around every call of mana_ib_create_queue()
I just integrated the logic you wanted inside. It also helped to solve
the double free bug in your patch as the ownership was not clearly defined
and ib/core and mana both had umem pointer that they wanted to clean.
>
> > }
> >
> > - err = mana_ib_create_zero_offset_dma_region(mdev, umem,
> &queue->gdma_region);
> > + err = mana_ib_create_zero_offset_dma_region(mdev, queue-
> >umem,
> > +&queue->gdma_region);
> > if (err) {
> > ibdev_dbg(&mdev->ib_dev, "Failed to create dma region,
> %d\n", err);
> > goto free_umem;
> > }
> > - queue->umem = umem;
> >
> > ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n",
> > queue->gdma_region);
> >
> > + if (umem) {
> > + /* Give ownership of umem to the caller */
> > + *umem = queue->umem;
> > + queue->umem = NULL;
> > + }
> > +
> > return 0;
> > free_umem:
> > - ib_umem_release(umem);
> > + if (!umem || !(*umem))
> > + /* deallocate mana's umem */
> > + ib_umem_release(queue->umem);
>
> This is another reason why ib_umem_get() shouldn't be buried in the driver's
> internals. IB/core is responsible to release it.
It is released by IB/core, if the helper is called with &umem and the helper succeeds.
This release is for an error case when mana creates umem and it should clean as it created it.
I think the rule is if function fails it should be side-effect free, that is what achieved in this helper.
I can make the helper more verbose and employ extra variables to clearly show that mana cleans
it only when it allocates it.
>
> Thanks
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-04 11:41 ` [EXTERNAL] " Konstantin Taranov
@ 2026-03-04 13:23 ` Konstantin Taranov
2026-03-04 14:06 ` Konstantin Taranov
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-04 13:23 UTC (permalink / raw)
To: Konstantin Taranov, Leon Romanovsky, Konstantin Taranov
Cc: Shiraz Saleem, Long Li, jgg@ziepe.ca, linux-rdma@vger.kernel.org,
linux-kernel@vger.kernel.org
> > > The uverbs CQ creation UAPI allows users to supply their own umem
> > > for a
> > CQ.
> > > Update mana to support this workflow while preserving support for
> > > creating umem through the legacy interface.
> > >
> > > To support RDMA objects that own umem, extend
> > mana_ib_create_queue()
> > > to return the umem to the caller and do not allocate umem if it was
> > > allocted by the caller.
> > >
> > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > ---
> > > v2: It is a rework of the patch proposed by Leon
> >
> > I am curious to know what changes were introduced?
>
> It is like your patch, but I kept get_umem in mana_ib_create_queue and
> introduced ownership.
> It made the code simpler and extendable. In your proposal, it was hard to
> track the changes and it led to double free of the umem. With new
> mana_ib_create_queue() it is clear from the caller what happens and no
> special changes in the caller required.
>
> >
> > > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
> > > drivers/infiniband/hw/mana/device.c | 1 +
> > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a 100644
> > > --- a/drivers/infiniband/hw/mana/cq.c
> > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > @@ -8,12 +8,8 @@
> > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > ib_cq_init_attr
> > *attr,
> > > struct uverbs_attr_bundle *attrs) {
> > > - struct ib_udata *udata = &attrs->driver_udata;
> > > struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > - struct mana_ib_create_cq_resp resp = {};
> > > - struct mana_ib_ucontext *mana_ucontext;
> > > struct ib_device *ibdev = ibcq->device;
> > > - struct mana_ib_create_cq ucmd = {};
> > > struct mana_ib_dev *mdev;
> > > bool is_rnic_cq;
> > > u32 doorbell;
> > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const
> > struct ib_cq_init_attr *attr,
> > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > >
> > > - if (udata) {
> > > - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > - return -EINVAL;
> > > -
> > > - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> > udata->inlen));
> > > - if (err) {
> > > - ibdev_dbg(ibdev, "Failed to copy from udata for
> > create cq, %d\n", err);
> > > - return err;
> > > - }
> > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > + return -EINVAL;
> >
> > We are talking about kernel verbs. ULPs are not designed to provide
> > attributes and recover from random driver limitations.
>
> I understand, but there was an ask before to add that check as some
> automated code verifier detected overflow. So if we remote it, I guess we get
> again an ask to fix the potential overflow.
>
> >
> > >
> > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > >adapter_caps.max_qp_wr) ||
> > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> > >cqe);
> > > - return -EINVAL;
> > > - }
> > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe *
> > COMP_ENTRY_SIZE));
> > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ,
> > &cq->queue);
> > > + if (err) {
> > > + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq,
> > %d\n", err);
> > > + return err;
> > > + }
> > > + doorbell = mdev->gdma_dev->doorbell;
> > >
> > > - cq->cqe = attr->cqe;
> > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> > * COMP_ENTRY_SIZE,
> > > - &cq->queue);
> > > + if (is_rnic_cq) {
> > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > if (err) {
> > > - ibdev_dbg(ibdev, "Failed to create queue for create
> > cq, %d\n", err);
> > > - return err;
> > > + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n",
> > err);
> > > + goto err_destroy_queue;
> > > }
> > >
> > > - mana_ucontext = rdma_udata_to_drv_context(udata, struct
> > mana_ib_ucontext,
> > > - ibucontext);
> > > - doorbell = mana_ucontext->doorbell;
> > > - } else {
> > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> > >cqe);
> > > - return -EINVAL;
> > > - }
> > > - buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > >cqe * COMP_ENTRY_SIZE));
> > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > GDMA_CQ, &cq->queue);
> > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > if (err) {
> > > - ibdev_dbg(ibdev, "Failed to create kernel queue for
> > create cq, %d\n", err);
> > > - return err;
> > > + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n",
> > err);
> > > + goto err_destroy_rnic_cq;
> > > }
> > > - doorbell = mdev->gdma_dev->doorbell;
> > > }
> > >
> > > + spin_lock_init(&cq->cq_lock);
> > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > +
> > > + return 0;
> > > +
> > > +err_destroy_rnic_cq:
> > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > +err_destroy_queue:
> > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > +
> > > + return err;
> > > +}
> > > +
> > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> > ib_cq_init_attr *attr,
> > > + struct uverbs_attr_bundle *attrs) {
> > > + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > + struct ib_udata *udata = &attrs->driver_udata;
> > > + struct mana_ib_create_cq_resp resp = {};
> > > + struct mana_ib_ucontext *mana_ucontext;
> > > + struct ib_device *ibdev = ibcq->device;
> > > + struct mana_ib_create_cq ucmd = {};
> > > + struct mana_ib_dev *mdev;
> > > + bool is_rnic_cq;
> > > + u32 doorbell;
> > > + int err;
> > > +
> > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > +
> > > + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > +
> > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > + return -EINVAL;
> > > +
> > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> > >inlen));
> > > + if (err) {
> > > + ibdev_dbg(ibdev, "Failed to copy from udata for create cq,
> > %d\n", err);
> > > + return err;
> > > + }
> > > +
> > > + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > + return -EINVAL;
> > > +
> > > + cq->cqe = attr->cqe;
> > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe *
> > COMP_ENTRY_SIZE,
> > > + &cq->queue, &ibcq->umem);
I just realized that I forgot to handle the case when ibcq->umem == NULL
and mana fails later after this call. I need to clean ibcq->umem in this case.
I will address that in v3. I am sorry.
- Konstantin
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-04 13:23 ` Konstantin Taranov
@ 2026-03-04 14:06 ` Konstantin Taranov
2026-03-04 15:59 ` Leon Romanovsky
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-04 14:06 UTC (permalink / raw)
To: Leon Romanovsky, Konstantin Taranov
Cc: Shiraz Saleem, Long Li, jgg@ziepe.ca, linux-rdma@vger.kernel.org,
linux-kernel@vger.kernel.org
> > > > The uverbs CQ creation UAPI allows users to supply their own umem
> > > > for a
> > > CQ.
> > > > Update mana to support this workflow while preserving support for
> > > > creating umem through the legacy interface.
> > > >
> > > > To support RDMA objects that own umem, extend
> > > mana_ib_create_queue()
> > > > to return the umem to the caller and do not allocate umem if it
> > > > was allocted by the caller.
> > > >
> > > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > > ---
> > > > v2: It is a rework of the patch proposed by Leon
> > >
> > > I am curious to know what changes were introduced?
> >
> > It is like your patch, but I kept get_umem in mana_ib_create_queue and
> > introduced ownership.
> > It made the code simpler and extendable. In your proposal, it was hard
> > to track the changes and it led to double free of the umem. With new
> > mana_ib_create_queue() it is clear from the caller what happens and no
> > special changes in the caller required.
> >
> > >
> > > > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
> > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > >
> > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a
> > > > 100644
> > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > @@ -8,12 +8,8 @@
> > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > ib_cq_init_attr
> > > *attr,
> > > > struct uverbs_attr_bundle *attrs) {
> > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > > - struct mana_ib_create_cq_resp resp = {};
> > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > struct ib_device *ibdev = ibcq->device;
> > > > - struct mana_ib_create_cq ucmd = {};
> > > > struct mana_ib_dev *mdev;
> > > > bool is_rnic_cq;
> > > > u32 doorbell;
> > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq,
> > > > const
> > > struct ib_cq_init_attr *attr,
> > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > >
> > > > - if (udata) {
> > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > - return -EINVAL;
> > > > -
> > > > - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> > > udata->inlen));
> > > > - if (err) {
> > > > - ibdev_dbg(ibdev, "Failed to copy from udata for
> > > create cq, %d\n", err);
> > > > - return err;
> > > > - }
> > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > + return -EINVAL;
> > >
> > > We are talking about kernel verbs. ULPs are not designed to provide
> > > attributes and recover from random driver limitations.
> >
> > I understand, but there was an ask before to add that check as some
> > automated code verifier detected overflow. So if we remote it, I guess
> > we get again an ask to fix the potential overflow.
> >
> > >
> > > >
> > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > >adapter_caps.max_qp_wr) ||
> > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> > > >cqe);
> > > > - return -EINVAL;
> > > > - }
> > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe *
> > > COMP_ENTRY_SIZE));
> > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ,
> > > &cq->queue);
> > > > + if (err) {
> > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq,
> > > %d\n", err);
> > > > + return err;
> > > > + }
> > > > + doorbell = mdev->gdma_dev->doorbell;
> > > >
> > > > - cq->cqe = attr->cqe;
> > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> > > * COMP_ENTRY_SIZE,
> > > > - &cq->queue);
> > > > + if (is_rnic_cq) {
> > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > if (err) {
> > > > - ibdev_dbg(ibdev, "Failed to create queue for create
> > > cq, %d\n", err);
> > > > - return err;
> > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n",
> > > err);
> > > > + goto err_destroy_queue;
> > > > }
> > > >
> > > > - mana_ucontext = rdma_udata_to_drv_context(udata, struct
> > > mana_ib_ucontext,
> > > > - ibucontext);
> > > > - doorbell = mana_ucontext->doorbell;
> > > > - } else {
> > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> > > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> > > >cqe);
> > > > - return -EINVAL;
> > > > - }
> > > > - buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > >cqe * COMP_ENTRY_SIZE));
> > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > GDMA_CQ, &cq->queue);
> > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > if (err) {
> > > > - ibdev_dbg(ibdev, "Failed to create kernel queue for
> > > create cq, %d\n", err);
> > > > - return err;
> > > > + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n",
> > > err);
> > > > + goto err_destroy_rnic_cq;
> > > > }
> > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > }
> > > >
> > > > + spin_lock_init(&cq->cq_lock);
> > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > +
> > > > + return 0;
> > > > +
> > > > +err_destroy_rnic_cq:
> > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > +err_destroy_queue:
> > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > +
> > > > + return err;
> > > > +}
> > > > +
> > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> > > ib_cq_init_attr *attr,
> > > > + struct uverbs_attr_bundle *attrs) {
> > > > + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > + struct mana_ib_create_cq_resp resp = {};
> > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > + struct ib_device *ibdev = ibcq->device;
> > > > + struct mana_ib_create_cq ucmd = {};
> > > > + struct mana_ib_dev *mdev;
> > > > + bool is_rnic_cq;
> > > > + u32 doorbell;
> > > > + int err;
> > > > +
> > > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > +
> > > > + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > +
> > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > + return -EINVAL;
> > > > +
> > > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> > > >inlen));
> > > > + if (err) {
> > > > + ibdev_dbg(ibdev, "Failed to copy from udata for create cq,
> > > %d\n", err);
> > > > + return err;
> > > > + }
> > > > +
> > > > + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > + return -EINVAL;
> > > > +
> > > > + cq->cqe = attr->cqe;
> > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe *
> > > COMP_ENTRY_SIZE,
> > > > + &cq->queue, &ibcq->umem);
>
> I just realized that I forgot to handle the case when ibcq->umem == NULL and
> mana fails later after this call. I need to clean ibcq->umem in this case.
> I will address that in v3. I am sorry.
>
Hi Leon,
After re-reading the code, I see that there is no bug in v2 as the umem gets deallocated
on failure inside the handler of UVERBS_METHOD_CQ_CREATE. I also see that you also had
the same logic in v1. So, what is your recommendation? Leave v2 logic as is, so mana would
immediately give ownership of umem to cq->umem, and if mana_ib_create_user_cq() fails at later stage
it should not clean cq->umem and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE)
to clean cq->umem regardless of who created it.
Or should I make v3, where I will assign umem to cq->umem right before return 0, so that if
mana_ib_create_user_cq() fails it does not change cq->umem at all.
> - Konstantin
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-04 14:06 ` Konstantin Taranov
@ 2026-03-04 15:59 ` Leon Romanovsky
2026-03-05 9:20 ` Konstantin Taranov
0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2026-03-04 15:59 UTC (permalink / raw)
To: Konstantin Taranov
Cc: Konstantin Taranov, Shiraz Saleem, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On Wed, Mar 04, 2026 at 02:06:21PM +0000, Konstantin Taranov wrote:
> > > > > The uverbs CQ creation UAPI allows users to supply their own umem
> > > > > for a
> > > > CQ.
> > > > > Update mana to support this workflow while preserving support for
> > > > > creating umem through the legacy interface.
> > > > >
> > > > > To support RDMA objects that own umem, extend
> > > > mana_ib_create_queue()
> > > > > to return the umem to the caller and do not allocate umem if it
> > > > > was allocted by the caller.
> > > > >
> > > > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > > > ---
> > > > > v2: It is a rework of the patch proposed by Leon
> > > >
> > > > I am curious to know what changes were introduced?
> > >
> > > It is like your patch, but I kept get_umem in mana_ib_create_queue and
> > > introduced ownership.
> > > It made the code simpler and extendable. In your proposal, it was hard
> > > to track the changes and it led to double free of the umem. With new
> > > mana_ib_create_queue() it is clear from the caller what happens and no
> > > special changes in the caller required.
> > >
> > > >
> > > > > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
> > > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > > >
> > > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a
> > > > > 100644
> > > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > > @@ -8,12 +8,8 @@
> > > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > > ib_cq_init_attr
> > > > *attr,
> > > > > struct uverbs_attr_bundle *attrs) {
> > > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > > struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > > > - struct mana_ib_create_cq_resp resp = {};
> > > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > > struct ib_device *ibdev = ibcq->device;
> > > > > - struct mana_ib_create_cq ucmd = {};
> > > > > struct mana_ib_dev *mdev;
> > > > > bool is_rnic_cq;
> > > > > u32 doorbell;
> > > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq,
> > > > > const
> > > > struct ib_cq_init_attr *attr,
> > > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > >
> > > > > - if (udata) {
> > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > - return -EINVAL;
> > > > > -
> > > > > - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> > > > udata->inlen));
> > > > > - if (err) {
> > > > > - ibdev_dbg(ibdev, "Failed to copy from udata for
> > > > create cq, %d\n", err);
> > > > > - return err;
> > > > > - }
> > > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > + return -EINVAL;
> > > >
> > > > We are talking about kernel verbs. ULPs are not designed to provide
> > > > attributes and recover from random driver limitations.
> > >
> > > I understand, but there was an ask before to add that check as some
> > > automated code verifier detected overflow. So if we remote it, I guess
> > > we get again an ask to fix the potential overflow.
> > >
> > > >
> > > > >
> > > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > > >adapter_caps.max_qp_wr) ||
> > > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> > > > >cqe);
> > > > > - return -EINVAL;
> > > > > - }
> > > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe *
> > > > COMP_ENTRY_SIZE));
> > > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ,
> > > > &cq->queue);
> > > > > + if (err) {
> > > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq,
> > > > %d\n", err);
> > > > > + return err;
> > > > > + }
> > > > > + doorbell = mdev->gdma_dev->doorbell;
> > > > >
> > > > > - cq->cqe = attr->cqe;
> > > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> > > > * COMP_ENTRY_SIZE,
> > > > > - &cq->queue);
> > > > > + if (is_rnic_cq) {
> > > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > > if (err) {
> > > > > - ibdev_dbg(ibdev, "Failed to create queue for create
> > > > cq, %d\n", err);
> > > > > - return err;
> > > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n",
> > > > err);
> > > > > + goto err_destroy_queue;
> > > > > }
> > > > >
> > > > > - mana_ucontext = rdma_udata_to_drv_context(udata, struct
> > > > mana_ib_ucontext,
> > > > > - ibucontext);
> > > > > - doorbell = mana_ucontext->doorbell;
> > > > > - } else {
> > > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> > > > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr-
> > > > >cqe);
> > > > > - return -EINVAL;
> > > > > - }
> > > > > - buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > >cqe * COMP_ENTRY_SIZE));
> > > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > GDMA_CQ, &cq->queue);
> > > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > > if (err) {
> > > > > - ibdev_dbg(ibdev, "Failed to create kernel queue for
> > > > create cq, %d\n", err);
> > > > > - return err;
> > > > > + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n",
> > > > err);
> > > > > + goto err_destroy_rnic_cq;
> > > > > }
> > > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > > }
> > > > >
> > > > > + spin_lock_init(&cq->cq_lock);
> > > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > > +
> > > > > + return 0;
> > > > > +
> > > > > +err_destroy_rnic_cq:
> > > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > > +err_destroy_queue:
> > > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > > +
> > > > > + return err;
> > > > > +}
> > > > > +
> > > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> > > > ib_cq_init_attr *attr,
> > > > > + struct uverbs_attr_bundle *attrs) {
> > > > > + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > > + struct mana_ib_create_cq_resp resp = {};
> > > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > > + struct ib_device *ibdev = ibcq->device;
> > > > > + struct mana_ib_create_cq ucmd = {};
> > > > > + struct mana_ib_dev *mdev;
> > > > > + bool is_rnic_cq;
> > > > > + u32 doorbell;
> > > > > + int err;
> > > > > +
> > > > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > > +
> > > > > + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> > > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > +
> > > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > + return -EINVAL;
> > > > > +
> > > > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> > > > >inlen));
> > > > > + if (err) {
> > > > > + ibdev_dbg(ibdev, "Failed to copy from udata for create cq,
> > > > %d\n", err);
> > > > > + return err;
> > > > > + }
> > > > > +
> > > > > + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> > > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > > + return -EINVAL;
> > > > > +
> > > > > + cq->cqe = attr->cqe;
> > > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe *
> > > > COMP_ENTRY_SIZE,
> > > > > + &cq->queue, &ibcq->umem);
> >
> > I just realized that I forgot to handle the case when ibcq->umem == NULL and
> > mana fails later after this call. I need to clean ibcq->umem in this case.
> > I will address that in v3. I am sorry.
> >
>
> Hi Leon,
> After re-reading the code, I see that there is no bug in v2 as the umem gets deallocated
> on failure inside the handler of UVERBS_METHOD_CQ_CREATE. I also see that you also had
> the same logic in v1. So, what is your recommendation? Leave v2 logic as is, so mana would
> immediately give ownership of umem to cq->umem, and if mana_ib_create_user_cq() fails at later stage
> it should not clean cq->umem and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE)
> to clean cq->umem regardless of who created it.
>
> Or should I make v3, where I will assign umem to cq->umem right before return 0, so that if
> mana_ib_create_user_cq() fails it does not change cq->umem at all.
My suggestion is to stick with my original patch and remove
ib_umem_release(queue->umem) from mana_ib_destroy_queue().
Thanks
>
> > - Konstantin
> >
>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-04 15:59 ` Leon Romanovsky
@ 2026-03-05 9:20 ` Konstantin Taranov
2026-03-11 13:29 ` Konstantin Taranov
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-05 9:20 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Konstantin Taranov, Shiraz Saleem, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
> On Wed, Mar 04, 2026 at 02:06:21PM +0000, Konstantin Taranov wrote:
> > > > > > The uverbs CQ creation UAPI allows users to supply their own
> > > > > > umem for a
> > > > > CQ.
> > > > > > Update mana to support this workflow while preserving support
> > > > > > for creating umem through the legacy interface.
> > > > > >
> > > > > > To support RDMA objects that own umem, extend
> > > > > mana_ib_create_queue()
> > > > > > to return the umem to the caller and do not allocate umem if
> > > > > > it was allocted by the caller.
> > > > > >
> > > > > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > > > > ---
> > > > > > v2: It is a rework of the patch proposed by Leon
> > > > >
> > > > > I am curious to know what changes were introduced?
> > > >
> > > > It is like your patch, but I kept get_umem in mana_ib_create_queue
> > > > and introduced ownership.
> > > > It made the code simpler and extendable. In your proposal, it was
> > > > hard to track the changes and it led to double free of the umem.
> > > > With new
> > > > mana_ib_create_queue() it is clear from the caller what happens
> > > > and no special changes in the caller required.
> > > >
> > > > >
> > > > > > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++------
> ----
> > > > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > > > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a
> > > > > > 100644
> > > > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > > > @@ -8,12 +8,8 @@
> > > > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > > > ib_cq_init_attr
> > > > > *attr,
> > > > > > struct uverbs_attr_bundle *attrs) {
> > > > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > > > struct mana_ib_cq *cq = container_of(ibcq, struct
> mana_ib_cq, ibcq);
> > > > > > - struct mana_ib_create_cq_resp resp = {};
> > > > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > > > struct ib_device *ibdev = ibcq->device;
> > > > > > - struct mana_ib_create_cq ucmd = {};
> > > > > > struct mana_ib_dev *mdev;
> > > > > > bool is_rnic_cq;
> > > > > > u32 doorbell;
> > > > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq,
> > > > > > const
> > > > > struct ib_cq_init_attr *attr,
> > > > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > >
> > > > > > - if (udata) {
> > > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq,
> flags))
> > > > > > - return -EINVAL;
> > > > > > -
> > > > > > - err = ib_copy_from_udata(&ucmd, udata,
> min(sizeof(ucmd),
> > > > > udata->inlen));
> > > > > > - if (err) {
> > > > > > - ibdev_dbg(ibdev, "Failed to copy from udata
> for
> > > > > create cq, %d\n", err);
> > > > > > - return err;
> > > > > > - }
> > > > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > > + return -EINVAL;
> > > > >
> > > > > We are talking about kernel verbs. ULPs are not designed to
> > > > > provide attributes and recover from random driver limitations.
> > > >
> > > > I understand, but there was an ask before to add that check as
> > > > some automated code verifier detected overflow. So if we remote
> > > > it, I guess we get again an ask to fix the potential overflow.
> > > >
> > > > >
> > > > > >
> > > > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > > > >adapter_caps.max_qp_wr) ||
> > > > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> limit\n", attr-
> > > > > >cqe);
> > > > > > - return -EINVAL;
> > > > > > - }
> > > > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> >cqe *
> > > > > COMP_ENTRY_SIZE));
> > > > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > + err = mana_ib_create_kernel_queue(mdev, buf_size,
> GDMA_CQ,
> > > > > &cq->queue);
> > > > > > + if (err) {
> > > > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for
> create
> > > > > > +cq,
> > > > > %d\n", err);
> > > > > > + return err;
> > > > > > + }
> > > > > > + doorbell = mdev->gdma_dev->doorbell;
> > > > > >
> > > > > > - cq->cqe = attr->cqe;
> > > > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> cq->cqe
> > > > > * COMP_ENTRY_SIZE,
> > > > > > - &cq->queue);
> > > > > > + if (is_rnic_cq) {
> > > > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > > > if (err) {
> > > > > > - ibdev_dbg(ibdev, "Failed to create queue for
> create
> > > > > cq, %d\n", err);
> > > > > > - return err;
> > > > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq,
> %d\n",
> > > > > err);
> > > > > > + goto err_destroy_queue;
> > > > > > }
> > > > > >
> > > > > > - mana_ucontext = rdma_udata_to_drv_context(udata,
> struct
> > > > > mana_ib_ucontext,
> > > > > > - ibucontext);
> > > > > > - doorbell = mana_ucontext->doorbell;
> > > > > > - } else {
> > > > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> {
> > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> limit\n", attr-
> > > > > >cqe);
> > > > > > - return -EINVAL;
> > > > > > - }
> > > > > > - buf_size =
> MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > > >cqe * COMP_ENTRY_SIZE));
> > > > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > > GDMA_CQ, &cq->queue);
> > > > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > > > if (err) {
> > > > > > - ibdev_dbg(ibdev, "Failed to create kernel
> queue for
> > > > > create cq, %d\n", err);
> > > > > > - return err;
> > > > > > + ibdev_dbg(ibdev, "Failed to install cq callback,
> %d\n",
> > > > > err);
> > > > > > + goto err_destroy_rnic_cq;
> > > > > > }
> > > > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > > > }
> > > > > >
> > > > > > + spin_lock_init(&cq->cq_lock);
> > > > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > > > +
> > > > > > + return 0;
> > > > > > +
> > > > > > +err_destroy_rnic_cq:
> > > > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > > > +err_destroy_queue:
> > > > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > > > +
> > > > > > + return err;
> > > > > > +}
> > > > > > +
> > > > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> > > > > ib_cq_init_attr *attr,
> > > > > > + struct uverbs_attr_bundle *attrs) {
> > > > > > + struct mana_ib_cq *cq = container_of(ibcq, struct
> mana_ib_cq, ibcq);
> > > > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > > > + struct mana_ib_create_cq_resp resp = {};
> > > > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > > > + struct ib_device *ibdev = ibcq->device;
> > > > > > + struct mana_ib_create_cq ucmd = {};
> > > > > > + struct mana_ib_dev *mdev;
> > > > > > + bool is_rnic_cq;
> > > > > > + u32 doorbell;
> > > > > > + int err;
> > > > > > +
> > > > > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > > > +
> > > > > > + cq->comp_vector = attr->comp_vector % ibdev-
> >num_comp_vectors;
> > > > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > +
> > > > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > > + return -EINVAL;
> > > > > > +
> > > > > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> > > > > > +udata-
> > > > > >inlen));
> > > > > > + if (err) {
> > > > > > + ibdev_dbg(ibdev, "Failed to copy from udata for
> create cq,
> > > > > %d\n", err);
> > > > > > + return err;
> > > > > > + }
> > > > > > +
> > > > > > + if ((!is_rnic_cq && attr->cqe > mdev-
> >adapter_caps.max_qp_wr) ||
> > > > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > > > + return -EINVAL;
> > > > > > +
> > > > > > + cq->cqe = attr->cqe;
> > > > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> *
> > > > > COMP_ENTRY_SIZE,
> > > > > > + &cq->queue, &ibcq->umem);
> > >
> > > I just realized that I forgot to handle the case when ibcq->umem ==
> > > NULL and mana fails later after this call. I need to clean ibcq->umem in
> this case.
> > > I will address that in v3. I am sorry.
> > >
> >
> > Hi Leon,
> > After re-reading the code, I see that there is no bug in v2 as the
> > umem gets deallocated on failure inside the handler of
> > UVERBS_METHOD_CQ_CREATE. I also see that you also had the same logic
> > in v1. So, what is your recommendation? Leave v2 logic as is, so mana
> > would immediately give ownership of umem to cq->umem, and if
> > mana_ib_create_user_cq() fails at later stage it should not clean cq->umem
> and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE) to clean
> cq->umem regardless of who created it.
> >
> > Or should I make v3, where I will assign umem to cq->umem right before
> > return 0, so that if
> > mana_ib_create_user_cq() fails it does not change cq->umem at all.
>
> My suggestion is to stick with my original patch and remove
> ib_umem_release(queue->umem) from mana_ib_destroy_queue().
Unfortunately, this will break the code and will require more boilerplate workarounds.
MANA still needs to allocate umem for many objects. I see that we can generalize for CQs,
but mana RC QPs use 4 queues at the moment, and I am preparing a patch to have 5th queue
for BIND WQEs for RC. Our UC QP will use 3 queues. Having a nice entity as mana queue allows us to
have clean code and do not have extra complex conditions to detect whether a mana queue
has an umem and the cleanup of mana queue handles that.
My proposal is to keep the nice property of mana queues and just extend the mana_ib_create_queue()
to satisfy your requirements without adding burden of special handling umems. As I understand the new
API requirement is that umem for a CQ should be owned by the ib core, and that is what the helper
achieves: the umem pointer is not stored and assigned to cq->umem directly. The only open
question I have is what the requirement for writing an umem to cq->umem is. In your patch, I see that
you mutate cq->umem even if mana_ib_create_user_cq() fails. Is it the behavior you need/want/allow and
will it be enforced in other IB objects that have one umem (e.g., WQs, SRQs)?
As it seems to be allowed in the UVERBS_METHOD_CQ_CREATE code:
if (ib_dev->ops.create_user_cq)
ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
else
ret = ib_dev->ops.create_cq(cq, &attr, attrs);
if (ret)
goto err_free;
...
err_free:
ib_umem_release(cq->umem);
If it is not expected, you might enforce it by adding WARN_ON(cq->umem != umem); before ib_umem_release()
And I am happy to adjust mana code to satisfy this behavior.
here I am just trying to get a win-win situation where we both can be happy about the code. I looked at
your proposal and removing ib_umem_release from mana destroy queue will just add it after mana_ib_destroy_queue()
in all code paths except the CQ. As well as we would need to add code to handle failures of ib_umem_get, so keeping it
in the helper removes the need to have that. Instead, I would like to make the helper to handle the cases when umem
is created by upper stack or is not created but want to be owned by the upper stack. What is more, I would like the helper
be general enough to be used for other ib core objects and that is why I would like to know the model so I adjust the helper accordingly.
Thanks
Konstantin
>
> Thanks
>
> >
> > > - Konstantin
> > >
> >
> >
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-05 9:20 ` Konstantin Taranov
@ 2026-03-11 13:29 ` Konstantin Taranov
2026-03-11 18:55 ` Leon Romanovsky
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-11 13:29 UTC (permalink / raw)
To: Konstantin Taranov, Leon Romanovsky
Cc: Konstantin Taranov, Shiraz Saleem, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
> > On Wed, Mar 04, 2026 at 02:06:21PM +0000, Konstantin Taranov wrote:
> > > > > > > The uverbs CQ creation UAPI allows users to supply their own
> > > > > > > umem for a
> > > > > > CQ.
> > > > > > > Update mana to support this workflow while preserving
> > > > > > > support for creating umem through the legacy interface.
> > > > > > >
> > > > > > > To support RDMA objects that own umem, extend
> > > > > > mana_ib_create_queue()
> > > > > > > to return the umem to the caller and do not allocate umem if
> > > > > > > it was allocted by the caller.
> > > > > > >
> > > > > > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > > > > > ---
> > > > > > > v2: It is a rework of the patch proposed by Leon
> > > > > >
> > > > > > I am curious to know what changes were introduced?
> > > > >
> > > > > It is like your patch, but I kept get_umem in
> > > > > mana_ib_create_queue and introduced ownership.
> > > > > It made the code simpler and extendable. In your proposal, it
> > > > > was hard to track the changes and it led to double free of the umem.
> > > > > With new
> > > > > mana_ib_create_queue() it is clear from the caller what happens
> > > > > and no special changes in the caller required.
> > > > >
> > > > > >
> > > > > > > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++---
> ---
> > ----
> > > > > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > > > > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a
> > > > > > > 100644
> > > > > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > > > > @@ -8,12 +8,8 @@
> > > > > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > > > > ib_cq_init_attr
> > > > > > *attr,
> > > > > > > struct uverbs_attr_bundle *attrs) {
> > > > > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > struct mana_ib_cq *cq = container_of(ibcq, struct
> > mana_ib_cq, ibcq);
> > > > > > > - struct mana_ib_create_cq_resp resp = {};
> > > > > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > > > > struct ib_device *ibdev = ibcq->device;
> > > > > > > - struct mana_ib_create_cq ucmd = {};
> > > > > > > struct mana_ib_dev *mdev;
> > > > > > > bool is_rnic_cq;
> > > > > > > u32 doorbell;
> > > > > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq
> > > > > > > *ibcq, const
> > > > > > struct ib_cq_init_attr *attr,
> > > > > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > >
> > > > > > > - if (udata) {
> > > > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq,
> > flags))
> > > > > > > - return -EINVAL;
> > > > > > > -
> > > > > > > - err = ib_copy_from_udata(&ucmd, udata,
> > min(sizeof(ucmd),
> > > > > > udata->inlen));
> > > > > > > - if (err) {
> > > > > > > - ibdev_dbg(ibdev, "Failed to copy from udata
> > for
> > > > > > create cq, %d\n", err);
> > > > > > > - return err;
> > > > > > > - }
> > > > > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > > > + return -EINVAL;
> > > > > >
> > > > > > We are talking about kernel verbs. ULPs are not designed to
> > > > > > provide attributes and recover from random driver limitations.
> > > > >
> > > > > I understand, but there was an ask before to add that check as
> > > > > some automated code verifier detected overflow. So if we remote
> > > > > it, I guess we get again an ask to fix the potential overflow.
> > > > >
> > > > > >
> > > > > > >
> > > > > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > > > > >adapter_caps.max_qp_wr) ||
> > > > > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > limit\n", attr-
> > > > > > >cqe);
> > > > > > > - return -EINVAL;
> > > > > > > - }
> > > > > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > >cqe *
> > > > > > COMP_ENTRY_SIZE));
> > > > > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > + err = mana_ib_create_kernel_queue(mdev, buf_size,
> > GDMA_CQ,
> > > > > > &cq->queue);
> > > > > > > + if (err) {
> > > > > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for
> > create
> > > > > > > +cq,
> > > > > > %d\n", err);
> > > > > > > + return err;
> > > > > > > + }
> > > > > > > + doorbell = mdev->gdma_dev->doorbell;
> > > > > > >
> > > > > > > - cq->cqe = attr->cqe;
> > > > > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> > cq->cqe
> > > > > > * COMP_ENTRY_SIZE,
> > > > > > > - &cq->queue);
> > > > > > > + if (is_rnic_cq) {
> > > > > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > > > > if (err) {
> > > > > > > - ibdev_dbg(ibdev, "Failed to create queue for
> > create
> > > > > > cq, %d\n", err);
> > > > > > > - return err;
> > > > > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq,
> > %d\n",
> > > > > > err);
> > > > > > > + goto err_destroy_queue;
> > > > > > > }
> > > > > > >
> > > > > > > - mana_ucontext = rdma_udata_to_drv_context(udata,
> > struct
> > > > > > mana_ib_ucontext,
> > > > > > > - ibucontext);
> > > > > > > - doorbell = mana_ucontext->doorbell;
> > > > > > > - } else {
> > > > > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > {
> > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > limit\n", attr-
> > > > > > >cqe);
> > > > > > > - return -EINVAL;
> > > > > > > - }
> > > > > > > - buf_size =
> > MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > > > >cqe * COMP_ENTRY_SIZE));
> > > > > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > > > GDMA_CQ, &cq->queue);
> > > > > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > > > > if (err) {
> > > > > > > - ibdev_dbg(ibdev, "Failed to create kernel
> > queue for
> > > > > > create cq, %d\n", err);
> > > > > > > - return err;
> > > > > > > + ibdev_dbg(ibdev, "Failed to install cq callback,
> > %d\n",
> > > > > > err);
> > > > > > > + goto err_destroy_rnic_cq;
> > > > > > > }
> > > > > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > > > > }
> > > > > > >
> > > > > > > + spin_lock_init(&cq->cq_lock);
> > > > > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > > > > +
> > > > > > > + return 0;
> > > > > > > +
> > > > > > > +err_destroy_rnic_cq:
> > > > > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > > > > +err_destroy_queue:
> > > > > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > > > > +
> > > > > > > + return err;
> > > > > > > +}
> > > > > > > +
> > > > > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> > > > > > ib_cq_init_attr *attr,
> > > > > > > + struct uverbs_attr_bundle *attrs) {
> > > > > > > + struct mana_ib_cq *cq = container_of(ibcq, struct
> > mana_ib_cq, ibcq);
> > > > > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > + struct mana_ib_create_cq_resp resp = {};
> > > > > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > > > > + struct ib_device *ibdev = ibcq->device;
> > > > > > > + struct mana_ib_create_cq ucmd = {};
> > > > > > > + struct mana_ib_dev *mdev;
> > > > > > > + bool is_rnic_cq;
> > > > > > > + u32 doorbell;
> > > > > > > + int err;
> > > > > > > +
> > > > > > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > > > > +
> > > > > > > + cq->comp_vector = attr->comp_vector % ibdev-
> > >num_comp_vectors;
> > > > > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > +
> > > > > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > > > + return -EINVAL;
> > > > > > > +
> > > > > > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> > > > > > > +udata-
> > > > > > >inlen));
> > > > > > > + if (err) {
> > > > > > > + ibdev_dbg(ibdev, "Failed to copy from udata for
> > create cq,
> > > > > > %d\n", err);
> > > > > > > + return err;
> > > > > > > + }
> > > > > > > +
> > > > > > > + if ((!is_rnic_cq && attr->cqe > mdev-
> > >adapter_caps.max_qp_wr) ||
> > > > > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > > > > + return -EINVAL;
> > > > > > > +
> > > > > > > + cq->cqe = attr->cqe;
> > > > > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> > *
> > > > > > COMP_ENTRY_SIZE,
> > > > > > > + &cq->queue, &ibcq->umem);
> > > >
> > > > I just realized that I forgot to handle the case when ibcq->umem
> > > > == NULL and mana fails later after this call. I need to clean
> > > > ibcq->umem in
> > this case.
> > > > I will address that in v3. I am sorry.
> > > >
> > >
> > > Hi Leon,
> > > After re-reading the code, I see that there is no bug in v2 as the
> > > umem gets deallocated on failure inside the handler of
> > > UVERBS_METHOD_CQ_CREATE. I also see that you also had the same
> logic
> > > in v1. So, what is your recommendation? Leave v2 logic as is, so
> > > mana would immediately give ownership of umem to cq->umem, and if
> > > mana_ib_create_user_cq() fails at later stage it should not clean
> > > cq->umem
> > and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE) to
> > clean
> > cq->umem regardless of who created it.
> > >
> > > Or should I make v3, where I will assign umem to cq->umem right
> > > before return 0, so that if
> > > mana_ib_create_user_cq() fails it does not change cq->umem at all.
> >
> > My suggestion is to stick with my original patch and remove
> > ib_umem_release(queue->umem) from mana_ib_destroy_queue().
>
> Unfortunately, this will break the code and will require more boilerplate
> workarounds.
> MANA still needs to allocate umem for many objects. I see that we can
> generalize for CQs, but mana RC QPs use 4 queues at the moment, and I am
> preparing a patch to have 5th queue for BIND WQEs for RC. Our UC QP will
> use 3 queues. Having a nice entity as mana queue allows us to have clean
> code and do not have extra complex conditions to detect whether a mana
> queue has an umem and the cleanup of mana queue handles that.
>
> My proposal is to keep the nice property of mana queues and just extend the
> mana_ib_create_queue() to satisfy your requirements without adding
> burden of special handling umems. As I understand the new API requirement
> is that umem for a CQ should be owned by the ib core, and that is what the
> helper
> achieves: the umem pointer is not stored and assigned to cq->umem directly.
> The only open question I have is what the requirement for writing an umem
> to cq->umem is. In your patch, I see that you mutate cq->umem even if
> mana_ib_create_user_cq() fails. Is it the behavior you need/want/allow and
> will it be enforced in other IB objects that have one umem (e.g., WQs, SRQs)?
> As it seems to be allowed in the UVERBS_METHOD_CQ_CREATE code:
> if (ib_dev->ops.create_user_cq)
> ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
> else
> ret = ib_dev->ops.create_cq(cq, &attr, attrs);
> if (ret)
> goto err_free;
> ...
> err_free:
> ib_umem_release(cq->umem);
> If it is not expected, you might enforce it by adding WARN_ON(cq->umem !=
> umem); before ib_umem_release() And I am happy to adjust mana code to
> satisfy this behavior.
>
> here I am just trying to get a win-win situation where we both can be happy
> about the code. I looked at your proposal and removing ib_umem_release
> from mana destroy queue will just add it after mana_ib_destroy_queue() in
> all code paths except the CQ. As well as we would need to add code to
> handle failures of ib_umem_get, so keeping it in the helper removes the
> need to have that. Instead, I would like to make the helper to handle the
> cases when umem is created by upper stack or is not created but want to be
> owned by the upper stack. What is more, I would like the helper be general
> enough to be used for other ib core objects and that is why I would like to
> know the model so I adjust the helper accordingly.
Hi Leon! Could you please respond to the question above so I can resend v2 or
suggest a v3 of this patch. I am asking as I would like to send patches for UC QP
support in mana_ib and it uses mana_ib_create_queue(). Or should I send UC QP
patches now with existing mana_ib_create_queue() signature and send a v3 for this patch
later, where I also fix mana_ib_create_queue() for UC QP handling?
Thanks
>
> Thanks
> Konstantin
>
> >
> > Thanks
> >
> > >
> > > > - Konstantin
> > > >
> > >
> > >
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-11 13:29 ` Konstantin Taranov
@ 2026-03-11 18:55 ` Leon Romanovsky
2026-03-17 14:05 ` Konstantin Taranov
0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2026-03-11 18:55 UTC (permalink / raw)
To: Konstantin Taranov
Cc: Konstantin Taranov, Shiraz Saleem, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On Wed, Mar 11, 2026 at 01:29:22PM +0000, Konstantin Taranov wrote:
> > > On Wed, Mar 04, 2026 at 02:06:21PM +0000, Konstantin Taranov wrote:
> > > > > > > > The uverbs CQ creation UAPI allows users to supply their own
> > > > > > > > umem for a
> > > > > > > CQ.
> > > > > > > > Update mana to support this workflow while preserving
> > > > > > > > support for creating umem through the legacy interface.
> > > > > > > >
> > > > > > > > To support RDMA objects that own umem, extend
> > > > > > > mana_ib_create_queue()
> > > > > > > > to return the umem to the caller and do not allocate umem if
> > > > > > > > it was allocted by the caller.
> > > > > > > >
> > > > > > > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > > > > > > ---
> > > > > > > > v2: It is a rework of the patch proposed by Leon
> > > > > > >
> > > > > > > I am curious to know what changes were introduced?
> > > > > >
> > > > > > It is like your patch, but I kept get_umem in
> > > > > > mana_ib_create_queue and introduced ownership.
> > > > > > It made the code simpler and extendable. In your proposal, it
> > > > > > was hard to track the changes and it led to double free of the umem.
> > > > > > With new
> > > > > > mana_ib_create_queue() it is clear from the caller what happens
> > > > > > and no special changes in the caller required.
> > > > > >
> > > > > > >
> > > > > > > > drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++---
> > ---
> > > ----
> > > > > > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > > > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > > > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > > > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > > > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > > > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > > > > > b/drivers/infiniband/hw/mana/cq.c index b2749f971..fa951732a
> > > > > > > > 100644
> > > > > > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > > > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > > > > > @@ -8,12 +8,8 @@
> > > > > > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > > > > > ib_cq_init_attr
> > > > > > > *attr,
> > > > > > > > struct uverbs_attr_bundle *attrs) {
> > > > > > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > > struct mana_ib_cq *cq = container_of(ibcq, struct
> > > mana_ib_cq, ibcq);
> > > > > > > > - struct mana_ib_create_cq_resp resp = {};
> > > > > > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > > > > > struct ib_device *ibdev = ibcq->device;
> > > > > > > > - struct mana_ib_create_cq ucmd = {};
> > > > > > > > struct mana_ib_dev *mdev;
> > > > > > > > bool is_rnic_cq;
> > > > > > > > u32 doorbell;
> > > > > > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq
> > > > > > > > *ibcq, const
> > > > > > > struct ib_cq_init_attr *attr,
> > > > > > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > >
> > > > > > > > - if (udata) {
> > > > > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq,
> > > flags))
> > > > > > > > - return -EINVAL;
> > > > > > > > -
> > > > > > > > - err = ib_copy_from_udata(&ucmd, udata,
> > > min(sizeof(ucmd),
> > > > > > > udata->inlen));
> > > > > > > > - if (err) {
> > > > > > > > - ibdev_dbg(ibdev, "Failed to copy from udata
> > > for
> > > > > > > create cq, %d\n", err);
> > > > > > > > - return err;
> > > > > > > > - }
> > > > > > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > > > > + return -EINVAL;
> > > > > > >
> > > > > > > We are talking about kernel verbs. ULPs are not designed to
> > > > > > > provide attributes and recover from random driver limitations.
> > > > > >
> > > > > > I understand, but there was an ask before to add that check as
> > > > > > some automated code verifier detected overflow. So if we remote
> > > > > > it, I guess we get again an ask to fix the potential overflow.
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > > > > > >adapter_caps.max_qp_wr) ||
> > > > > > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > > limit\n", attr-
> > > > > > > >cqe);
> > > > > > > > - return -EINVAL;
> > > > > > > > - }
> > > > > > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > >cqe *
> > > > > > > COMP_ENTRY_SIZE));
> > > > > > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > > + err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > GDMA_CQ,
> > > > > > > &cq->queue);
> > > > > > > > + if (err) {
> > > > > > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for
> > > create
> > > > > > > > +cq,
> > > > > > > %d\n", err);
> > > > > > > > + return err;
> > > > > > > > + }
> > > > > > > > + doorbell = mdev->gdma_dev->doorbell;
> > > > > > > >
> > > > > > > > - cq->cqe = attr->cqe;
> > > > > > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> > > cq->cqe
> > > > > > > * COMP_ENTRY_SIZE,
> > > > > > > > - &cq->queue);
> > > > > > > > + if (is_rnic_cq) {
> > > > > > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > > > > > if (err) {
> > > > > > > > - ibdev_dbg(ibdev, "Failed to create queue for
> > > create
> > > > > > > cq, %d\n", err);
> > > > > > > > - return err;
> > > > > > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq,
> > > %d\n",
> > > > > > > err);
> > > > > > > > + goto err_destroy_queue;
> > > > > > > > }
> > > > > > > >
> > > > > > > > - mana_ucontext = rdma_udata_to_drv_context(udata,
> > > struct
> > > > > > > mana_ib_ucontext,
> > > > > > > > - ibucontext);
> > > > > > > > - doorbell = mana_ucontext->doorbell;
> > > > > > > > - } else {
> > > > > > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > {
> > > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > > limit\n", attr-
> > > > > > > >cqe);
> > > > > > > > - return -EINVAL;
> > > > > > > > - }
> > > > > > > > - buf_size =
> > > MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > > > > >cqe * COMP_ENTRY_SIZE));
> > > > > > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > > > > GDMA_CQ, &cq->queue);
> > > > > > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > > > > > if (err) {
> > > > > > > > - ibdev_dbg(ibdev, "Failed to create kernel
> > > queue for
> > > > > > > create cq, %d\n", err);
> > > > > > > > - return err;
> > > > > > > > + ibdev_dbg(ibdev, "Failed to install cq callback,
> > > %d\n",
> > > > > > > err);
> > > > > > > > + goto err_destroy_rnic_cq;
> > > > > > > > }
> > > > > > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > > > > > }
> > > > > > > >
> > > > > > > > + spin_lock_init(&cq->cq_lock);
> > > > > > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > > > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > > > > > +
> > > > > > > > + return 0;
> > > > > > > > +
> > > > > > > > +err_destroy_rnic_cq:
> > > > > > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > > > > > +err_destroy_queue:
> > > > > > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > > > > > +
> > > > > > > > + return err;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct
> > > > > > > ib_cq_init_attr *attr,
> > > > > > > > + struct uverbs_attr_bundle *attrs) {
> > > > > > > > + struct mana_ib_cq *cq = container_of(ibcq, struct
> > > mana_ib_cq, ibcq);
> > > > > > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > > + struct mana_ib_create_cq_resp resp = {};
> > > > > > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > > > > > + struct ib_device *ibdev = ibcq->device;
> > > > > > > > + struct mana_ib_create_cq ucmd = {};
> > > > > > > > + struct mana_ib_dev *mdev;
> > > > > > > > + bool is_rnic_cq;
> > > > > > > > + u32 doorbell;
> > > > > > > > + int err;
> > > > > > > > +
> > > > > > > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > > > > > +
> > > > > > > > + cq->comp_vector = attr->comp_vector % ibdev-
> > > >num_comp_vectors;
> > > > > > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > > +
> > > > > > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > > > > + return -EINVAL;
> > > > > > > > +
> > > > > > > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd),
> > > > > > > > +udata-
> > > > > > > >inlen));
> > > > > > > > + if (err) {
> > > > > > > > + ibdev_dbg(ibdev, "Failed to copy from udata for
> > > create cq,
> > > > > > > %d\n", err);
> > > > > > > > + return err;
> > > > > > > > + }
> > > > > > > > +
> > > > > > > > + if ((!is_rnic_cq && attr->cqe > mdev-
> > > >adapter_caps.max_qp_wr) ||
> > > > > > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > > > > > + return -EINVAL;
> > > > > > > > +
> > > > > > > > + cq->cqe = attr->cqe;
> > > > > > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe
> > > *
> > > > > > > COMP_ENTRY_SIZE,
> > > > > > > > + &cq->queue, &ibcq->umem);
> > > > >
> > > > > I just realized that I forgot to handle the case when ibcq->umem
> > > > > == NULL and mana fails later after this call. I need to clean
> > > > > ibcq->umem in
> > > this case.
> > > > > I will address that in v3. I am sorry.
> > > > >
> > > >
> > > > Hi Leon,
> > > > After re-reading the code, I see that there is no bug in v2 as the
> > > > umem gets deallocated on failure inside the handler of
> > > > UVERBS_METHOD_CQ_CREATE. I also see that you also had the same
> > logic
> > > > in v1. So, what is your recommendation? Leave v2 logic as is, so
> > > > mana would immediately give ownership of umem to cq->umem, and if
> > > > mana_ib_create_user_cq() fails at later stage it should not clean
> > > > cq->umem
> > > and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE) to
> > > clean
> > > cq->umem regardless of who created it.
> > > >
> > > > Or should I make v3, where I will assign umem to cq->umem right
> > > > before return 0, so that if
> > > > mana_ib_create_user_cq() fails it does not change cq->umem at all.
> > >
> > > My suggestion is to stick with my original patch and remove
> > > ib_umem_release(queue->umem) from mana_ib_destroy_queue().
> >
> > Unfortunately, this will break the code and will require more boilerplate
> > workarounds.
> > MANA still needs to allocate umem for many objects. I see that we can
> > generalize for CQs, but mana RC QPs use 4 queues at the moment, and I am
> > preparing a patch to have 5th queue for BIND WQEs for RC. Our UC QP will
> > use 3 queues. Having a nice entity as mana queue allows us to have clean
> > code and do not have extra complex conditions to detect whether a mana
> > queue has an umem and the cleanup of mana queue handles that.
> >
> > My proposal is to keep the nice property of mana queues and just extend the
> > mana_ib_create_queue() to satisfy your requirements without adding
> > burden of special handling umems. As I understand the new API requirement
> > is that umem for a CQ should be owned by the ib core, and that is what the
> > helper
> > achieves: the umem pointer is not stored and assigned to cq->umem directly.
> > The only open question I have is what the requirement for writing an umem
> > to cq->umem is. In your patch, I see that you mutate cq->umem even if
> > mana_ib_create_user_cq() fails. Is it the behavior you need/want/allow and
> > will it be enforced in other IB objects that have one umem (e.g., WQs, SRQs)?
> > As it seems to be allowed in the UVERBS_METHOD_CQ_CREATE code:
> > if (ib_dev->ops.create_user_cq)
> > ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
> > else
> > ret = ib_dev->ops.create_cq(cq, &attr, attrs);
> > if (ret)
> > goto err_free;
> > ...
> > err_free:
> > ib_umem_release(cq->umem);
> > If it is not expected, you might enforce it by adding WARN_ON(cq->umem !=
> > umem); before ib_umem_release() And I am happy to adjust mana code to
> > satisfy this behavior.
> >
> > here I am just trying to get a win-win situation where we both can be happy
> > about the code. I looked at your proposal and removing ib_umem_release
> > from mana destroy queue will just add it after mana_ib_destroy_queue() in
> > all code paths except the CQ. As well as we would need to add code to
> > handle failures of ib_umem_get, so keeping it in the helper removes the
> > need to have that. Instead, I would like to make the helper to handle the
> > cases when umem is created by upper stack or is not created but want to be
> > owned by the upper stack. What is more, I would like the helper be general
> > enough to be used for other ib core objects and that is why I would like to
> > know the model so I adjust the helper accordingly.
>
> Hi Leon! Could you please respond to the question above so I can resend v2 or
> suggest a v3 of this patch. I am asking as I would like to send patches for UC QP
> support in mana_ib and it uses mana_ib_create_queue(). Or should I send UC QP
> patches now with existing mana_ib_create_queue() signature and send a v3 for this patch
> later, where I also fix mana_ib_create_queue() for UC QP handling?
It depends on your readiness. If your UC QP support is complete, send it.
Regarding .create_user_cq(), you are not required to implement it if you
prefer not to. Your driver did not support this callback before my
series. The purpose of .create_user_cq() is to enable handling of
additional UMEM types, such as dmabuf for GPU memory. Later in this
cycle, memfd-based UMEMs will be added to improve ib_umem_get()
performance.
If you want your driver to support these UMEM types, you will need to
follow the API contract: do not call ib_umem_release in the
.create_user_cq() or .destroy_cq() paths.
Thanks
>
> Thanks
>
> >
> > Thanks
> > Konstantin
> >
> > >
> > > Thanks
> > >
> > > >
> > > > > - Konstantin
> > > > >
> > > >
> > > >
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-11 18:55 ` Leon Romanovsky
@ 2026-03-17 14:05 ` Konstantin Taranov
2026-03-17 16:27 ` Leon Romanovsky
0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Taranov @ 2026-03-17 14:05 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Konstantin Taranov, Shiraz Saleem, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
> On Wed, Mar 11, 2026 at 01:29:22PM +0000, Konstantin Taranov wrote:
> > > > On Wed, Mar 04, 2026 at 02:06:21PM +0000, Konstantin Taranov wrote:
> > > > > > > > > The uverbs CQ creation UAPI allows users to supply their
> > > > > > > > > own umem for a
> > > > > > > > CQ.
> > > > > > > > > Update mana to support this workflow while preserving
> > > > > > > > > support for creating umem through the legacy interface.
> > > > > > > > >
> > > > > > > > > To support RDMA objects that own umem, extend
> > > > > > > > mana_ib_create_queue()
> > > > > > > > > to return the umem to the caller and do not allocate
> > > > > > > > > umem if it was allocted by the caller.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Konstantin Taranov
> > > > > > > > > <kotaranov@microsoft.com>
> > > > > > > > > ---
> > > > > > > > > v2: It is a rework of the patch proposed by Leon
> > > > > > > >
> > > > > > > > I am curious to know what changes were introduced?
> > > > > > >
> > > > > > > It is like your patch, but I kept get_umem in
> > > > > > > mana_ib_create_queue and introduced ownership.
> > > > > > > It made the code simpler and extendable. In your proposal,
> > > > > > > it was hard to track the changes and it led to double free of the
> umem.
> > > > > > > With new
> > > > > > > mana_ib_create_queue() it is clear from the caller what
> > > > > > > happens and no special changes in the caller required.
> > > > > > >
> > > > > > > >
> > > > > > > > > drivers/infiniband/hw/mana/cq.c | 125
> +++++++++++++++++---
> > > ---
> > > > ----
> > > > > > > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > > > > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > > > > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > > > > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > > > > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > > > > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > > > > > > b/drivers/infiniband/hw/mana/cq.c index
> > > > > > > > > b2749f971..fa951732a
> > > > > > > > > 100644
> > > > > > > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > > > > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > > > > > > @@ -8,12 +8,8 @@
> > > > > > > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > > > > > > ib_cq_init_attr
> > > > > > > > *attr,
> > > > > > > > > struct uverbs_attr_bundle *attrs) {
> > > > > > > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > > > struct mana_ib_cq *cq = container_of(ibcq, struct
> > > > mana_ib_cq, ibcq);
> > > > > > > > > - struct mana_ib_create_cq_resp resp = {};
> > > > > > > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > > > > > > struct ib_device *ibdev = ibcq->device;
> > > > > > > > > - struct mana_ib_create_cq ucmd = {};
> > > > > > > > > struct mana_ib_dev *mdev;
> > > > > > > > > bool is_rnic_cq;
> > > > > > > > > u32 doorbell;
> > > > > > > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq
> > > > > > > > > *ibcq, const
> > > > > > > > struct ib_cq_init_attr *attr,
> > > > > > > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > > >
> > > > > > > > > - if (udata) {
> > > > > > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq,
> > > > flags))
> > > > > > > > > - return -EINVAL;
> > > > > > > > > -
> > > > > > > > > - err = ib_copy_from_udata(&ucmd, udata,
> > > > min(sizeof(ucmd),
> > > > > > > > udata->inlen));
> > > > > > > > > - if (err) {
> > > > > > > > > - ibdev_dbg(ibdev, "Failed to copy from udata
> > > > for
> > > > > > > > create cq, %d\n", err);
> > > > > > > > > - return err;
> > > > > > > > > - }
> > > > > > > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > > > > > + return -EINVAL;
> > > > > > > >
> > > > > > > > We are talking about kernel verbs. ULPs are not designed
> > > > > > > > to provide attributes and recover from random driver limitations.
> > > > > > >
> > > > > > > I understand, but there was an ask before to add that check
> > > > > > > as some automated code verifier detected overflow. So if we
> > > > > > > remote it, I guess we get again an ask to fix the potential overflow.
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > > > > > > >adapter_caps.max_qp_wr) ||
> > > > > > > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > > > limit\n", attr-
> > > > > > > > >cqe);
> > > > > > > > > - return -EINVAL;
> > > > > > > > > - }
> > > > > > > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > >cqe *
> > > > > > > > COMP_ENTRY_SIZE));
> > > > > > > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > > > + err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > GDMA_CQ,
> > > > > > > > &cq->queue);
> > > > > > > > > + if (err) {
> > > > > > > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for
> > > > create
> > > > > > > > > +cq,
> > > > > > > > %d\n", err);
> > > > > > > > > + return err;
> > > > > > > > > + }
> > > > > > > > > + doorbell = mdev->gdma_dev->doorbell;
> > > > > > > > >
> > > > > > > > > - cq->cqe = attr->cqe;
> > > > > > > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> > > > cq->cqe
> > > > > > > > * COMP_ENTRY_SIZE,
> > > > > > > > > - &cq->queue);
> > > > > > > > > + if (is_rnic_cq) {
> > > > > > > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > > > > > > if (err) {
> > > > > > > > > - ibdev_dbg(ibdev, "Failed to create queue for
> > > > create
> > > > > > > > cq, %d\n", err);
> > > > > > > > > - return err;
> > > > > > > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq,
> > > > %d\n",
> > > > > > > > err);
> > > > > > > > > + goto err_destroy_queue;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > - mana_ucontext = rdma_udata_to_drv_context(udata,
> > > > struct
> > > > > > > > mana_ib_ucontext,
> > > > > > > > > - ibucontext);
> > > > > > > > > - doorbell = mana_ucontext->doorbell;
> > > > > > > > > - } else {
> > > > > > > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > {
> > > > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > > > limit\n", attr-
> > > > > > > > >cqe);
> > > > > > > > > - return -EINVAL;
> > > > > > > > > - }
> > > > > > > > > - buf_size =
> > > > MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > > > > > >cqe * COMP_ENTRY_SIZE));
> > > > > > > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > > > > > GDMA_CQ, &cq->queue);
> > > > > > > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > > > > > > if (err) {
> > > > > > > > > - ibdev_dbg(ibdev, "Failed to create kernel
> > > > queue for
> > > > > > > > create cq, %d\n", err);
> > > > > > > > > - return err;
> > > > > > > > > + ibdev_dbg(ibdev, "Failed to install cq callback,
> > > > %d\n",
> > > > > > > > err);
> > > > > > > > > + goto err_destroy_rnic_cq;
> > > > > > > > > }
> > > > > > > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > + spin_lock_init(&cq->cq_lock);
> > > > > > > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > > > > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > > > > > > +
> > > > > > > > > + return 0;
> > > > > > > > > +
> > > > > > > > > +err_destroy_rnic_cq:
> > > > > > > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > > > > > > +err_destroy_queue:
> > > > > > > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > > > > > > +
> > > > > > > > > + return err;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const
> > > > > > > > > +struct
> > > > > > > > ib_cq_init_attr *attr,
> > > > > > > > > + struct uverbs_attr_bundle *attrs) {
> > > > > > > > > + struct mana_ib_cq *cq = container_of(ibcq, struct
> > > > mana_ib_cq, ibcq);
> > > > > > > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > > > + struct mana_ib_create_cq_resp resp = {};
> > > > > > > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > > > > > > + struct ib_device *ibdev = ibcq->device;
> > > > > > > > > + struct mana_ib_create_cq ucmd = {};
> > > > > > > > > + struct mana_ib_dev *mdev;
> > > > > > > > > + bool is_rnic_cq;
> > > > > > > > > + u32 doorbell;
> > > > > > > > > + int err;
> > > > > > > > > +
> > > > > > > > > + mdev = container_of(ibdev, struct mana_ib_dev,
> > > > > > > > > +ib_dev);
> > > > > > > > > +
> > > > > > > > > + cq->comp_vector = attr->comp_vector % ibdev-
> > > > >num_comp_vectors;
> > > > > > > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > > > +
> > > > > > > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > > > > > + return -EINVAL;
> > > > > > > > > +
> > > > > > > > > + err = ib_copy_from_udata(&ucmd, udata,
> > > > > > > > > +min(sizeof(ucmd),
> > > > > > > > > +udata-
> > > > > > > > >inlen));
> > > > > > > > > + if (err) {
> > > > > > > > > + ibdev_dbg(ibdev, "Failed to copy from udata for
> > > > create cq,
> > > > > > > > %d\n", err);
> > > > > > > > > + return err;
> > > > > > > > > + }
> > > > > > > > > +
> > > > > > > > > + if ((!is_rnic_cq && attr->cqe > mdev-
> > > > >adapter_caps.max_qp_wr) ||
> > > > > > > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > > > > > > + return -EINVAL;
> > > > > > > > > +
> > > > > > > > > + cq->cqe = attr->cqe;
> > > > > > > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> > > > > > > > > +cq->cqe
> > > > *
> > > > > > > > COMP_ENTRY_SIZE,
> > > > > > > > > + &cq->queue, &ibcq->umem);
> > > > > >
> > > > > > I just realized that I forgot to handle the case when
> > > > > > ibcq->umem == NULL and mana fails later after this call. I
> > > > > > need to clean
> > > > > > ibcq->umem in
> > > > this case.
> > > > > > I will address that in v3. I am sorry.
> > > > > >
> > > > >
> > > > > Hi Leon,
> > > > > After re-reading the code, I see that there is no bug in v2 as
> > > > > the umem gets deallocated on failure inside the handler of
> > > > > UVERBS_METHOD_CQ_CREATE. I also see that you also had the same
> > > logic
> > > > > in v1. So, what is your recommendation? Leave v2 logic as is, so
> > > > > mana would immediately give ownership of umem to cq->umem, and
> > > > > if
> > > > > mana_ib_create_user_cq() fails at later stage it should not
> > > > > clean
> > > > > cq->umem
> > > > and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE)
> > > > to clean
> > > > cq->umem regardless of who created it.
> > > > >
> > > > > Or should I make v3, where I will assign umem to cq->umem right
> > > > > before return 0, so that if
> > > > > mana_ib_create_user_cq() fails it does not change cq->umem at all.
> > > >
> > > > My suggestion is to stick with my original patch and remove
> > > > ib_umem_release(queue->umem) from mana_ib_destroy_queue().
> > >
> > > Unfortunately, this will break the code and will require more
> > > boilerplate workarounds.
> > > MANA still needs to allocate umem for many objects. I see that we
> > > can generalize for CQs, but mana RC QPs use 4 queues at the moment,
> > > and I am preparing a patch to have 5th queue for BIND WQEs for RC.
> > > Our UC QP will use 3 queues. Having a nice entity as mana queue
> > > allows us to have clean code and do not have extra complex
> > > conditions to detect whether a mana queue has an umem and the
> cleanup of mana queue handles that.
> > >
> > > My proposal is to keep the nice property of mana queues and just
> > > extend the
> > > mana_ib_create_queue() to satisfy your requirements without adding
> > > burden of special handling umems. As I understand the new API
> > > requirement is that umem for a CQ should be owned by the ib core,
> > > and that is what the helper
> > > achieves: the umem pointer is not stored and assigned to cq->umem
> directly.
> > > The only open question I have is what the requirement for writing an
> > > umem to cq->umem is. In your patch, I see that you mutate cq->umem
> > > even if
> > > mana_ib_create_user_cq() fails. Is it the behavior you
> > > need/want/allow and will it be enforced in other IB objects that have one
> umem (e.g., WQs, SRQs)?
> > > As it seems to be allowed in the UVERBS_METHOD_CQ_CREATE code:
> > > if (ib_dev->ops.create_user_cq)
> > > ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
> > > else
> > > ret = ib_dev->ops.create_cq(cq, &attr, attrs);
> > > if (ret)
> > > goto err_free;
> > > ...
> > > err_free:
> > > ib_umem_release(cq->umem);
> > > If it is not expected, you might enforce it by adding
> > > WARN_ON(cq->umem != umem); before ib_umem_release() And I am
> happy
> > > to adjust mana code to satisfy this behavior.
> > >
> > > here I am just trying to get a win-win situation where we both can
> > > be happy about the code. I looked at your proposal and removing
> > > ib_umem_release from mana destroy queue will just add it after
> > > mana_ib_destroy_queue() in all code paths except the CQ. As well as
> > > we would need to add code to handle failures of ib_umem_get, so
> > > keeping it in the helper removes the need to have that. Instead, I
> > > would like to make the helper to handle the cases when umem is
> > > created by upper stack or is not created but want to be owned by the
> > > upper stack. What is more, I would like the helper be general enough
> > > to be used for other ib core objects and that is why I would like to know
> the model so I adjust the helper accordingly.
> >
> > Hi Leon! Could you please respond to the question above so I can
> > resend v2 or suggest a v3 of this patch. I am asking as I would like
> > to send patches for UC QP support in mana_ib and it uses
> > mana_ib_create_queue(). Or should I send UC QP patches now with
> > existing mana_ib_create_queue() signature and send a v3 for this patch
> later, where I also fix mana_ib_create_queue() for UC QP handling?
>
> It depends on your readiness. If your UC QP support is complete, send it.
>
> Regarding .create_user_cq(), you are not required to implement it if you
> prefer not to. Your driver did not support this callback before my series. The
> purpose of .create_user_cq() is to enable handling of additional UMEM
> types, such as dmabuf for GPU memory. Later in this cycle, memfd-based
> UMEMs will be added to improve ib_umem_get() performance.
>
> If you want your driver to support these UMEM types, you will need to follow
> the API contract: do not call ib_umem_release in the
> .create_user_cq() or .destroy_cq() paths.
To confirm that we are on the same page.
It means that when mana gets cq->umem == NULL, I should assign it to ib_umem_get()
and if there is an error later, the mana_ib_create_user_cq() should not
call ib_umem_release() even though it created it.
Correct?
Konstantin
>
> Thanks
>
> >
> > Thanks
> >
> > >
> > > Thanks
> > > Konstantin
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > > - Konstantin
> > > > > >
> > > > >
> > > > >
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
2026-03-17 14:05 ` Konstantin Taranov
@ 2026-03-17 16:27 ` Leon Romanovsky
0 siblings, 0 replies; 11+ messages in thread
From: Leon Romanovsky @ 2026-03-17 16:27 UTC (permalink / raw)
To: Konstantin Taranov
Cc: Konstantin Taranov, Shiraz Saleem, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On Tue, Mar 17, 2026 at 02:05:43PM +0000, Konstantin Taranov wrote:
> > On Wed, Mar 11, 2026 at 01:29:22PM +0000, Konstantin Taranov wrote:
> > > > > On Wed, Mar 04, 2026 at 02:06:21PM +0000, Konstantin Taranov wrote:
> > > > > > > > > > The uverbs CQ creation UAPI allows users to supply their
> > > > > > > > > > own umem for a
> > > > > > > > > CQ.
> > > > > > > > > > Update mana to support this workflow while preserving
> > > > > > > > > > support for creating umem through the legacy interface.
> > > > > > > > > >
> > > > > > > > > > To support RDMA objects that own umem, extend
> > > > > > > > > mana_ib_create_queue()
> > > > > > > > > > to return the umem to the caller and do not allocate
> > > > > > > > > > umem if it was allocted by the caller.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Konstantin Taranov
> > > > > > > > > > <kotaranov@microsoft.com>
> > > > > > > > > > ---
> > > > > > > > > > v2: It is a rework of the patch proposed by Leon
> > > > > > > > >
> > > > > > > > > I am curious to know what changes were introduced?
> > > > > > > >
> > > > > > > > It is like your patch, but I kept get_umem in
> > > > > > > > mana_ib_create_queue and introduced ownership.
> > > > > > > > It made the code simpler and extendable. In your proposal,
> > > > > > > > it was hard to track the changes and it led to double free of the
> > umem.
> > > > > > > > With new
> > > > > > > > mana_ib_create_queue() it is clear from the caller what
> > > > > > > > happens and no special changes in the caller required.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > drivers/infiniband/hw/mana/cq.c | 125
> > +++++++++++++++++---
> > > > ---
> > > > > ----
> > > > > > > > > > drivers/infiniband/hw/mana/device.c | 1 +
> > > > > > > > > > drivers/infiniband/hw/mana/main.c | 30 +++++--
> > > > > > > > > > drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> > > > > > > > > > drivers/infiniband/hw/mana/qp.c | 5 +-
> > > > > > > > > > drivers/infiniband/hw/mana/wq.c | 3 +-
> > > > > > > > > > 6 files changed, 111 insertions(+), 58 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > > > > > > > b/drivers/infiniband/hw/mana/cq.c index
> > > > > > > > > > b2749f971..fa951732a
> > > > > > > > > > 100644
> > > > > > > > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > > > > > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > > > > > > > @@ -8,12 +8,8 @@
> > > > > > > > > > int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> > > > > > > > > > ib_cq_init_attr
> > > > > > > > > *attr,
> > > > > > > > > > struct uverbs_attr_bundle *attrs) {
> > > > > > > > > > - struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > > > > struct mana_ib_cq *cq = container_of(ibcq, struct
> > > > > mana_ib_cq, ibcq);
> > > > > > > > > > - struct mana_ib_create_cq_resp resp = {};
> > > > > > > > > > - struct mana_ib_ucontext *mana_ucontext;
> > > > > > > > > > struct ib_device *ibdev = ibcq->device;
> > > > > > > > > > - struct mana_ib_create_cq ucmd = {};
> > > > > > > > > > struct mana_ib_dev *mdev;
> > > > > > > > > > bool is_rnic_cq;
> > > > > > > > > > u32 doorbell;
> > > > > > > > > > @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq
> > > > > > > > > > *ibcq, const
> > > > > > > > > struct ib_cq_init_attr *attr,
> > > > > > > > > > cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > > > > is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > > > >
> > > > > > > > > > - if (udata) {
> > > > > > > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq,
> > > > > flags))
> > > > > > > > > > - return -EINVAL;
> > > > > > > > > > -
> > > > > > > > > > - err = ib_copy_from_udata(&ucmd, udata,
> > > > > min(sizeof(ucmd),
> > > > > > > > > udata->inlen));
> > > > > > > > > > - if (err) {
> > > > > > > > > > - ibdev_dbg(ibdev, "Failed to copy from udata
> > > > > for
> > > > > > > > > create cq, %d\n", err);
> > > > > > > > > > - return err;
> > > > > > > > > > - }
> > > > > > > > > > + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > > > > > > + return -EINVAL;
> > > > > > > > >
> > > > > > > > > We are talking about kernel verbs. ULPs are not designed
> > > > > > > > > to provide attributes and recover from random driver limitations.
> > > > > > > >
> > > > > > > > I understand, but there was an ask before to add that check
> > > > > > > > as some automated code verifier detected overflow. So if we
> > > > > > > > remote it, I guess we get again an ask to fix the potential overflow.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > - if ((!is_rnic_cq && attr->cqe > mdev-
> > > > > > > > > >adapter_caps.max_qp_wr) ||
> > > > > > > > > > - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> > > > > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > > > > limit\n", attr-
> > > > > > > > > >cqe);
> > > > > > > > > > - return -EINVAL;
> > > > > > > > > > - }
> > > > > > > > > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > > >cqe *
> > > > > > > > > COMP_ENTRY_SIZE));
> > > > > > > > > > + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > > > > + err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > > GDMA_CQ,
> > > > > > > > > &cq->queue);
> > > > > > > > > > + if (err) {
> > > > > > > > > > + ibdev_dbg(ibdev, "Failed to create kernel queue for
> > > > > create
> > > > > > > > > > +cq,
> > > > > > > > > %d\n", err);
> > > > > > > > > > + return err;
> > > > > > > > > > + }
> > > > > > > > > > + doorbell = mdev->gdma_dev->doorbell;
> > > > > > > > > >
> > > > > > > > > > - cq->cqe = attr->cqe;
> > > > > > > > > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> > > > > cq->cqe
> > > > > > > > > * COMP_ENTRY_SIZE,
> > > > > > > > > > - &cq->queue);
> > > > > > > > > > + if (is_rnic_cq) {
> > > > > > > > > > + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> > > > > > > > > > if (err) {
> > > > > > > > > > - ibdev_dbg(ibdev, "Failed to create queue for
> > > > > create
> > > > > > > > > cq, %d\n", err);
> > > > > > > > > > - return err;
> > > > > > > > > > + ibdev_dbg(ibdev, "Failed to create RNIC cq,
> > > > > %d\n",
> > > > > > > > > err);
> > > > > > > > > > + goto err_destroy_queue;
> > > > > > > > > > }
> > > > > > > > > >
> > > > > > > > > > - mana_ucontext = rdma_udata_to_drv_context(udata,
> > > > > struct
> > > > > > > > > mana_ib_ucontext,
> > > > > > > > > > - ibucontext);
> > > > > > > > > > - doorbell = mana_ucontext->doorbell;
> > > > > > > > > > - } else {
> > > > > > > > > > - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> > > > > {
> > > > > > > > > > - ibdev_dbg(ibdev, "CQE %d exceeding
> > > > > limit\n", attr-
> > > > > > > > > >cqe);
> > > > > > > > > > - return -EINVAL;
> > > > > > > > > > - }
> > > > > > > > > > - buf_size =
> > > > > MANA_PAGE_ALIGN(roundup_pow_of_two(attr-
> > > > > > > > > >cqe * COMP_ENTRY_SIZE));
> > > > > > > > > > - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> > > > > > > > > > - err = mana_ib_create_kernel_queue(mdev, buf_size,
> > > > > > > > > GDMA_CQ, &cq->queue);
> > > > > > > > > > + err = mana_ib_install_cq_cb(mdev, cq);
> > > > > > > > > > if (err) {
> > > > > > > > > > - ibdev_dbg(ibdev, "Failed to create kernel
> > > > > queue for
> > > > > > > > > create cq, %d\n", err);
> > > > > > > > > > - return err;
> > > > > > > > > > + ibdev_dbg(ibdev, "Failed to install cq callback,
> > > > > %d\n",
> > > > > > > > > err);
> > > > > > > > > > + goto err_destroy_rnic_cq;
> > > > > > > > > > }
> > > > > > > > > > - doorbell = mdev->gdma_dev->doorbell;
> > > > > > > > > > }
> > > > > > > > > >
> > > > > > > > > > + spin_lock_init(&cq->cq_lock);
> > > > > > > > > > + INIT_LIST_HEAD(&cq->list_send_qp);
> > > > > > > > > > + INIT_LIST_HEAD(&cq->list_recv_qp);
> > > > > > > > > > +
> > > > > > > > > > + return 0;
> > > > > > > > > > +
> > > > > > > > > > +err_destroy_rnic_cq:
> > > > > > > > > > + mana_ib_gd_destroy_cq(mdev, cq);
> > > > > > > > > > +err_destroy_queue:
> > > > > > > > > > + mana_ib_destroy_queue(mdev, &cq->queue);
> > > > > > > > > > +
> > > > > > > > > > + return err;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +int mana_ib_create_user_cq(struct ib_cq *ibcq, const
> > > > > > > > > > +struct
> > > > > > > > > ib_cq_init_attr *attr,
> > > > > > > > > > + struct uverbs_attr_bundle *attrs) {
> > > > > > > > > > + struct mana_ib_cq *cq = container_of(ibcq, struct
> > > > > mana_ib_cq, ibcq);
> > > > > > > > > > + struct ib_udata *udata = &attrs->driver_udata;
> > > > > > > > > > + struct mana_ib_create_cq_resp resp = {};
> > > > > > > > > > + struct mana_ib_ucontext *mana_ucontext;
> > > > > > > > > > + struct ib_device *ibdev = ibcq->device;
> > > > > > > > > > + struct mana_ib_create_cq ucmd = {};
> > > > > > > > > > + struct mana_ib_dev *mdev;
> > > > > > > > > > + bool is_rnic_cq;
> > > > > > > > > > + u32 doorbell;
> > > > > > > > > > + int err;
> > > > > > > > > > +
> > > > > > > > > > + mdev = container_of(ibdev, struct mana_ib_dev,
> > > > > > > > > > +ib_dev);
> > > > > > > > > > +
> > > > > > > > > > + cq->comp_vector = attr->comp_vector % ibdev-
> > > > > >num_comp_vectors;
> > > > > > > > > > + cq->cq_handle = INVALID_MANA_HANDLE;
> > > > > > > > > > + is_rnic_cq = mana_ib_is_rnic(mdev);
> > > > > > > > > > +
> > > > > > > > > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> > > > > > > > > > + return -EINVAL;
> > > > > > > > > > +
> > > > > > > > > > + err = ib_copy_from_udata(&ucmd, udata,
> > > > > > > > > > +min(sizeof(ucmd),
> > > > > > > > > > +udata-
> > > > > > > > > >inlen));
> > > > > > > > > > + if (err) {
> > > > > > > > > > + ibdev_dbg(ibdev, "Failed to copy from udata for
> > > > > create cq,
> > > > > > > > > %d\n", err);
> > > > > > > > > > + return err;
> > > > > > > > > > + }
> > > > > > > > > > +
> > > > > > > > > > + if ((!is_rnic_cq && attr->cqe > mdev-
> > > > > >adapter_caps.max_qp_wr) ||
> > > > > > > > > > + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> > > > > > > > > > + return -EINVAL;
> > > > > > > > > > +
> > > > > > > > > > + cq->cqe = attr->cqe;
> > > > > > > > > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr,
> > > > > > > > > > +cq->cqe
> > > > > *
> > > > > > > > > COMP_ENTRY_SIZE,
> > > > > > > > > > + &cq->queue, &ibcq->umem);
> > > > > > >
> > > > > > > I just realized that I forgot to handle the case when
> > > > > > > ibcq->umem == NULL and mana fails later after this call. I
> > > > > > > need to clean
> > > > > > > ibcq->umem in
> > > > > this case.
> > > > > > > I will address that in v3. I am sorry.
> > > > > > >
> > > > > >
> > > > > > Hi Leon,
> > > > > > After re-reading the code, I see that there is no bug in v2 as
> > > > > > the umem gets deallocated on failure inside the handler of
> > > > > > UVERBS_METHOD_CQ_CREATE. I also see that you also had the same
> > > > logic
> > > > > > in v1. So, what is your recommendation? Leave v2 logic as is, so
> > > > > > mana would immediately give ownership of umem to cq->umem, and
> > > > > > if
> > > > > > mana_ib_create_user_cq() fails at later stage it should not
> > > > > > clean
> > > > > > cq->umem
> > > > > and leave it to the caller handle (i.e., UVERBS_METHOD_CQ_CREATE)
> > > > > to clean
> > > > > cq->umem regardless of who created it.
> > > > > >
> > > > > > Or should I make v3, where I will assign umem to cq->umem right
> > > > > > before return 0, so that if
> > > > > > mana_ib_create_user_cq() fails it does not change cq->umem at all.
> > > > >
> > > > > My suggestion is to stick with my original patch and remove
> > > > > ib_umem_release(queue->umem) from mana_ib_destroy_queue().
> > > >
> > > > Unfortunately, this will break the code and will require more
> > > > boilerplate workarounds.
> > > > MANA still needs to allocate umem for many objects. I see that we
> > > > can generalize for CQs, but mana RC QPs use 4 queues at the moment,
> > > > and I am preparing a patch to have 5th queue for BIND WQEs for RC.
> > > > Our UC QP will use 3 queues. Having a nice entity as mana queue
> > > > allows us to have clean code and do not have extra complex
> > > > conditions to detect whether a mana queue has an umem and the
> > cleanup of mana queue handles that.
> > > >
> > > > My proposal is to keep the nice property of mana queues and just
> > > > extend the
> > > > mana_ib_create_queue() to satisfy your requirements without adding
> > > > burden of special handling umems. As I understand the new API
> > > > requirement is that umem for a CQ should be owned by the ib core,
> > > > and that is what the helper
> > > > achieves: the umem pointer is not stored and assigned to cq->umem
> > directly.
> > > > The only open question I have is what the requirement for writing an
> > > > umem to cq->umem is. In your patch, I see that you mutate cq->umem
> > > > even if
> > > > mana_ib_create_user_cq() fails. Is it the behavior you
> > > > need/want/allow and will it be enforced in other IB objects that have one
> > umem (e.g., WQs, SRQs)?
> > > > As it seems to be allowed in the UVERBS_METHOD_CQ_CREATE code:
> > > > if (ib_dev->ops.create_user_cq)
> > > > ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
> > > > else
> > > > ret = ib_dev->ops.create_cq(cq, &attr, attrs);
> > > > if (ret)
> > > > goto err_free;
> > > > ...
> > > > err_free:
> > > > ib_umem_release(cq->umem);
> > > > If it is not expected, you might enforce it by adding
> > > > WARN_ON(cq->umem != umem); before ib_umem_release() And I am
> > happy
> > > > to adjust mana code to satisfy this behavior.
> > > >
> > > > here I am just trying to get a win-win situation where we both can
> > > > be happy about the code. I looked at your proposal and removing
> > > > ib_umem_release from mana destroy queue will just add it after
> > > > mana_ib_destroy_queue() in all code paths except the CQ. As well as
> > > > we would need to add code to handle failures of ib_umem_get, so
> > > > keeping it in the helper removes the need to have that. Instead, I
> > > > would like to make the helper to handle the cases when umem is
> > > > created by upper stack or is not created but want to be owned by the
> > > > upper stack. What is more, I would like the helper be general enough
> > > > to be used for other ib core objects and that is why I would like to know
> > the model so I adjust the helper accordingly.
> > >
> > > Hi Leon! Could you please respond to the question above so I can
> > > resend v2 or suggest a v3 of this patch. I am asking as I would like
> > > to send patches for UC QP support in mana_ib and it uses
> > > mana_ib_create_queue(). Or should I send UC QP patches now with
> > > existing mana_ib_create_queue() signature and send a v3 for this patch
> > later, where I also fix mana_ib_create_queue() for UC QP handling?
> >
> > It depends on your readiness. If your UC QP support is complete, send it.
> >
> > Regarding .create_user_cq(), you are not required to implement it if you
> > prefer not to. Your driver did not support this callback before my series. The
> > purpose of .create_user_cq() is to enable handling of additional UMEM
> > types, such as dmabuf for GPU memory. Later in this cycle, memfd-based
> > UMEMs will be added to improve ib_umem_get() performance.
> >
> > If you want your driver to support these UMEM types, you will need to follow
> > the API contract: do not call ib_umem_release in the
> > .create_user_cq() or .destroy_cq() paths.
>
> To confirm that we are on the same page.
> It means that when mana gets cq->umem == NULL, I should assign it to ib_umem_get()
> and if there is an error later, the mana_ib_create_user_cq() should not
> call ib_umem_release() even though it created it.
> Correct?
Yes, we have a plan for removing that ib_umem_get() in the future.
Thanks
>
> Konstantin
>
> >
> > Thanks
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks
> > > > Konstantin
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > > - Konstantin
> > > > > > >
> > > > > >
> > > > > >
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2026-03-17 16:27 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-03 12:48 [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface Konstantin Taranov
2026-03-04 11:05 ` Leon Romanovsky
2026-03-04 11:41 ` [EXTERNAL] " Konstantin Taranov
2026-03-04 13:23 ` Konstantin Taranov
2026-03-04 14:06 ` Konstantin Taranov
2026-03-04 15:59 ` Leon Romanovsky
2026-03-05 9:20 ` Konstantin Taranov
2026-03-11 13:29 ` Konstantin Taranov
2026-03-11 18:55 ` Leon Romanovsky
2026-03-17 14:05 ` Konstantin Taranov
2026-03-17 16:27 ` Leon Romanovsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox