public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Konstantin Taranov <kotaranov@linux.microsoft.com>
Cc: kotaranov@microsoft.com, shirazsaleem@microsoft.com,
	longli@microsoft.com, jgg@ziepe.ca, linux-rdma@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
Date: Wed, 4 Mar 2026 13:05:00 +0200	[thread overview]
Message-ID: <20260304110500.GZ12611@unreal> (raw)
In-Reply-To: <20260303124825.301452-1-kotaranov@linux.microsoft.com>

On Tue, Mar 03, 2026 at 04:48:25AM -0800, Konstantin Taranov wrote:
> From: Konstantin Taranov <kotaranov@microsoft.com>
> 
> The uverbs CQ creation UAPI allows users to supply their own umem for a CQ.
> Update mana to support this workflow while preserving support for creating
> umem through the legacy interface.
> 
> To support RDMA objects that own umem, extend mana_ib_create_queue() to return
> the umem to the caller and do not allocate umem if it was allocted
> by the caller.
> 
> Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> ---
> v2: It is a rework of the patch proposed by Leon

I am curious to know what changes were introduced?

>  drivers/infiniband/hw/mana/cq.c      | 125 +++++++++++++++++----------
>  drivers/infiniband/hw/mana/device.c  |   1 +
>  drivers/infiniband/hw/mana/main.c    |  30 +++++--
>  drivers/infiniband/hw/mana/mana_ib.h |   5 +-
>  drivers/infiniband/hw/mana/qp.c      |   5 +-
>  drivers/infiniband/hw/mana/wq.c      |   3 +-
>  6 files changed, 111 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
> index b2749f971..fa951732a 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -8,12 +8,8 @@
>  int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>  		      struct uverbs_attr_bundle *attrs)
>  {
> -	struct ib_udata *udata = &attrs->driver_udata;
>  	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> -	struct mana_ib_create_cq_resp resp = {};
> -	struct mana_ib_ucontext *mana_ucontext;
>  	struct ib_device *ibdev = ibcq->device;
> -	struct mana_ib_create_cq ucmd = {};
>  	struct mana_ib_dev *mdev;
>  	bool is_rnic_cq;
>  	u32 doorbell;
> @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>  	cq->cq_handle = INVALID_MANA_HANDLE;
>  	is_rnic_cq = mana_ib_is_rnic(mdev);
>  
> -	if (udata) {
> -		if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> -			return -EINVAL;
> -
> -		err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
> -		if (err) {
> -			ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
> -			return err;
> -		}
> +	if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> +		return -EINVAL;

We are talking about kernel verbs. ULPs are not designed to provide
attributes and recover from random driver limitations. 

>  
> -		if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> -		    attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> -			ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
> -			return -EINVAL;
> -		}
> +	buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
> +	cq->cqe = buf_size / COMP_ENTRY_SIZE;
> +	err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
> +	if (err) {
> +		ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
> +		return err;
> +	}
> +	doorbell = mdev->gdma_dev->doorbell;
>  
> -		cq->cqe = attr->cqe;
> -		err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
> -					   &cq->queue);
> +	if (is_rnic_cq) {
> +		err = mana_ib_gd_create_cq(mdev, cq, doorbell);
>  		if (err) {
> -			ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
> -			return err;
> +			ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
> +			goto err_destroy_queue;
>  		}
>  
> -		mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> -							  ibucontext);
> -		doorbell = mana_ucontext->doorbell;
> -	} else {
> -		if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> -			ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
> -			return -EINVAL;
> -		}
> -		buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
> -		cq->cqe = buf_size / COMP_ENTRY_SIZE;
> -		err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
> +		err = mana_ib_install_cq_cb(mdev, cq);
>  		if (err) {
> -			ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
> -			return err;
> +			ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
> +			goto err_destroy_rnic_cq;
>  		}
> -		doorbell = mdev->gdma_dev->doorbell;
>  	}
>  
> +	spin_lock_init(&cq->cq_lock);
> +	INIT_LIST_HEAD(&cq->list_send_qp);
> +	INIT_LIST_HEAD(&cq->list_recv_qp);
> +
> +	return 0;
> +
> +err_destroy_rnic_cq:
> +	mana_ib_gd_destroy_cq(mdev, cq);
> +err_destroy_queue:
> +	mana_ib_destroy_queue(mdev, &cq->queue);
> +
> +	return err;
> +}
> +
> +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> +			   struct uverbs_attr_bundle *attrs)
> +{
> +	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> +	struct ib_udata *udata = &attrs->driver_udata;
> +	struct mana_ib_create_cq_resp resp = {};
> +	struct mana_ib_ucontext *mana_ucontext;
> +	struct ib_device *ibdev = ibcq->device;
> +	struct mana_ib_create_cq ucmd = {};
> +	struct mana_ib_dev *mdev;
> +	bool is_rnic_cq;
> +	u32 doorbell;
> +	int err;
> +
> +	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> +
> +	cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> +	cq->cq_handle = INVALID_MANA_HANDLE;
> +	is_rnic_cq = mana_ib_is_rnic(mdev);
> +
> +	if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> +		return -EINVAL;
> +
> +	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
> +	if (err) {
> +		ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
> +		return err;
> +	}
> +
> +	if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> +	    attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> +		return -EINVAL;
> +
> +	cq->cqe = attr->cqe;
> +	err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
> +				   &cq->queue, &ibcq->umem);
> +	if (err) {
> +		ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
> +		return err;
> +	}
> +
> +	mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> +						  ibucontext);
> +	doorbell = mana_ucontext->doorbell;
> +
>  	if (is_rnic_cq) {
>  		err = mana_ib_gd_create_cq(mdev, cq, doorbell);
>  		if (err) {
> @@ -82,13 +121,11 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>  		}
>  	}
>  
> -	if (udata) {
> -		resp.cqid = cq->queue.id;
> -		err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
> -		if (err) {
> -			ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
> -			goto err_remove_cq_cb;
> -		}
> +	resp.cqid = cq->queue.id;
> +	err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
> +	if (err) {
> +		ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
> +		goto err_remove_cq_cb;
>  	}
>  
>  	spin_lock_init(&cq->cq_lock);
> diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
> index ccc2279ca..c5c5fe051 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -21,6 +21,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
>  	.alloc_ucontext = mana_ib_alloc_ucontext,
>  	.create_ah = mana_ib_create_ah,
>  	.create_cq = mana_ib_create_cq,
> +	.create_user_cq = mana_ib_create_user_cq,
>  	.create_qp = mana_ib_create_qp,
>  	.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
>  	.create_wq = mana_ib_create_wq,
> diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
> index 8d99cd00f..d1f1e217b 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -262,33 +262,45 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
>  }
>  
>  int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
> -			 struct mana_ib_queue *queue)
> +			 struct mana_ib_queue *queue, struct ib_umem **umem)
>  {
> -	struct ib_umem *umem;
>  	int err;
>  
>  	queue->umem = NULL;
>  	queue->id = INVALID_QUEUE_ID;
>  	queue->gdma_region = GDMA_INVALID_DMA_REGION;
>  
> -	umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
> -	if (IS_ERR(umem)) {
> -		ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
> -		return PTR_ERR(umem);
> +	if (umem)
> +		queue->umem = *umem;
> +
> +	if (!queue->umem) {
> +		/* if umem is not provided, allocate it */
> +		queue->umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
> +		if (IS_ERR(queue->umem)) {
> +			ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", queue->umem);
> +			return PTR_ERR(queue->umem);
> +		}

I moved this hunk to the callers on purpose. The idea is to call to
ib_umem_get() as early as possible.

>  	}
>  
> -	err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
> +	err = mana_ib_create_zero_offset_dma_region(mdev, queue->umem, &queue->gdma_region);
>  	if (err) {
>  		ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
>  		goto free_umem;
>  	}
> -	queue->umem = umem;
>  
>  	ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
>  
> +	if (umem) {
> +		/* Give ownership of umem to the caller */
> +		*umem = queue->umem;
> +		queue->umem = NULL;
> +	}
> +
>  	return 0;
>  free_umem:
> -	ib_umem_release(umem);
> +	if (!umem || !(*umem))
> +		/* deallocate mana's umem */
> +		ib_umem_release(queue->umem);

This is another reason why ib_umem_get() shouldn't be buried in the
driver's internals. IB/core is responsible to release it.

Thanks

  reply	other threads:[~2026-03-04 11:05 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-03 12:48 [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface Konstantin Taranov
2026-03-04 11:05 ` Leon Romanovsky [this message]
2026-03-04 11:41   ` [EXTERNAL] " Konstantin Taranov
2026-03-04 13:23     ` Konstantin Taranov
2026-03-04 14:06       ` Konstantin Taranov
2026-03-04 15:59         ` Leon Romanovsky
2026-03-05  9:20           ` Konstantin Taranov
2026-03-11 13:29             ` Konstantin Taranov
2026-03-11 18:55               ` Leon Romanovsky
2026-03-17 14:05                 ` Konstantin Taranov
2026-03-17 16:27                   ` Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260304110500.GZ12611@unreal \
    --to=leon@kernel.org \
    --cc=jgg@ziepe.ca \
    --cc=kotaranov@linux.microsoft.com \
    --cc=kotaranov@microsoft.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=shirazsaleem@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox