Linux-HyperV List
 help / color / mirror / Atom feed
* [PATCH rdma-next 12/50] RDMA/mlx4: Inline mlx4_ib_get_cq_umem into callers
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Inline the mlx4_ib_get_cq_umem helper function into its two call sites
(mlx4_ib_create_cq and mlx4_alloc_resize_umem) to prepare for the
transition to modern CQ creation interface.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx4/cq.c | 108 ++++++++++++++++++++++------------------
 1 file changed, 60 insertions(+), 48 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index c592374f4a58..94e9ff45725a 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -135,45 +135,6 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
 }
 
-static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev,
-			       struct mlx4_ib_cq_buf *buf,
-			       struct ib_umem **umem, u64 buf_addr, int cqe)
-{
-	int err;
-	int cqe_size = dev->dev->caps.cqe_size;
-	int shift;
-	int n;
-
-	*umem = ib_umem_get(&dev->ib_dev, buf_addr, cqe * cqe_size,
-			    IB_ACCESS_LOCAL_WRITE);
-	if (IS_ERR(*umem))
-		return PTR_ERR(*umem);
-
-	shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
-	if (shift < 0) {
-		err = shift;
-		goto err_buf;
-	}
-
-	err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
-	if (err)
-		goto err_buf;
-
-	err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem);
-	if (err)
-		goto err_mtt;
-
-	return 0;
-
-err_mtt:
-	mlx4_mtt_cleanup(dev->dev, &buf->mtt);
-
-err_buf:
-	ib_umem_release(*umem);
-
-	return err;
-}
-
 #define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
 int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		      struct uverbs_attr_bundle *attrs)
@@ -208,6 +169,9 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	if (udata) {
 		struct mlx4_ib_create_cq ucmd;
+		int cqe_size = dev->dev->caps.cqe_size;
+		int shift;
+		int n;
 
 		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 			err = -EFAULT;
@@ -215,10 +179,28 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		}
 
 		buf_addr = (void *)(unsigned long)ucmd.buf_addr;
-		err = mlx4_ib_get_cq_umem(dev, &cq->buf, &cq->umem,
-					  ucmd.buf_addr, entries);
-		if (err)
+
+		cq->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
+				       entries * cqe_size,
+				       IB_ACCESS_LOCAL_WRITE);
+		if (IS_ERR(cq->umem)) {
+			err = PTR_ERR(cq->umem);
 			goto err_cq;
+		}
+
+		shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->umem, 0, &n);
+		if (shift < 0) {
+			err = shift;
+			goto err_umem;
+		}
+
+		err = mlx4_mtt_init(dev->dev, n, shift, &cq->buf.mtt);
+		if (err)
+			goto err_umem;
+
+		err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
+		if (err)
+			goto err_mtt;
 
 		err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db);
 		if (err)
@@ -281,6 +263,7 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
 
+err_umem:
 	ib_umem_release(cq->umem);
 	if (!udata)
 		mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
@@ -320,6 +303,9 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
 				   int entries, struct ib_udata *udata)
 {
 	struct mlx4_ib_resize_cq ucmd;
+	int cqe_size = dev->dev->caps.cqe_size;
+	int shift;
+	int n;
 	int err;
 
 	if (cq->resize_umem)
@@ -332,17 +318,43 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
 	if (!cq->resize_buf)
 		return -ENOMEM;
 
-	err = mlx4_ib_get_cq_umem(dev, &cq->resize_buf->buf, &cq->resize_umem,
-				  ucmd.buf_addr, entries);
-	if (err) {
-		kfree(cq->resize_buf);
-		cq->resize_buf = NULL;
-		return err;
+	cq->resize_umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
+				      entries * cqe_size,
+				      IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(cq->resize_umem)) {
+		err = PTR_ERR(cq->resize_umem);
+		goto err_buf;
+	}
+
+	shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->resize_umem, 0, &n);
+	if (shift < 0) {
+		err = shift;
+		goto err_umem;
 	}
 
+	err = mlx4_mtt_init(dev->dev, n, shift, &cq->resize_buf->buf.mtt);
+	if (err)
+		goto err_umem;
+
+	err = mlx4_ib_umem_write_mtt(dev, &cq->resize_buf->buf.mtt,
+				     cq->resize_umem);
+	if (err)
+		goto err_mtt;
+
 	cq->resize_buf->cqe = entries - 1;
 
 	return 0;
+
+err_mtt:
+	mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt);
+
+err_umem:
+	ib_umem_release(cq->resize_umem);
+
+err_buf:
+	kfree(cq->resize_buf);
+	cq->resize_buf = NULL;
+	return err;
 }
 
 static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 15/50] RDMA/bnxt_re: Convert to modern CQ interface
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Allow users to supply their own umem.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 172 ++++++++++++++++++++-----------
 drivers/infiniband/hw/bnxt_re/ib_verbs.h |   4 +-
 drivers/infiniband/hw/bnxt_re/main.c     |   1 +
 3 files changed, 113 insertions(+), 64 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index c146f43ae875..b8516d8b8426 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3134,22 +3134,20 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 	nq = cq->qplib_cq.nq;
 	cctx = rdev->chip_ctx;
 
-	if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
-		free_page((unsigned long)cq->uctx_cq_page);
+	free_page((unsigned long)cq->uctx_cq_page);
+	if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT)
 		hash_del(&cq->hash_entry);
-	}
-	bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
 
+	bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
 	bnxt_re_put_nq(rdev, nq);
-	ib_umem_release(cq->umem);
-
 	atomic_dec(&rdev->stats.res.cq_count);
 	kfree(cq->cql);
 	return 0;
 }
 
-int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		      struct uverbs_attr_bundle *attrs)
+int bnxt_re_create_user_cq(struct ib_cq *ibcq,
+			   const struct ib_cq_init_attr *attr,
+			   struct uverbs_attr_bundle *attrs)
 {
 	struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev);
@@ -3158,6 +3156,8 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
 	struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
 	struct bnxt_qplib_chip_ctx *cctx;
+	struct bnxt_re_cq_resp resp = {};
+	struct bnxt_re_cq_req req;
 	int cqe = attr->cqe;
 	int rc, entries;
 	u32 active_cqs;
@@ -3166,7 +3166,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		return -EOPNOTSUPP;
 
 	/* Validate CQ fields */
-	if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+	if (attr->cqe > dev_attr->max_cq_wqes) {
 		ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded");
 		return -EINVAL;
 	}
@@ -3181,33 +3181,107 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
 	cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
-	if (udata) {
-		struct bnxt_re_cq_req req;
-		if (ib_copy_from_udata(&req, udata, sizeof(req))) {
-			rc = -EFAULT;
-			goto fail;
-		}
 
-		cq->umem = ib_umem_get(&rdev->ibdev, req.cq_va,
-				       entries * sizeof(struct cq_base),
-				       IB_ACCESS_LOCAL_WRITE);
-		if (IS_ERR(cq->umem)) {
-			rc = PTR_ERR(cq->umem);
-			goto fail;
-		}
-		cq->qplib_cq.sg_info.umem = cq->umem;
-		cq->qplib_cq.dpi = &uctx->dpi;
-	} else {
-		cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
-		cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe),
-				  GFP_KERNEL);
-		if (!cq->cql) {
+	if (ib_copy_from_udata(&req, udata, sizeof(req)))
+		return -EFAULT;
+
+	if (!ibcq->umem)
+		ibcq->umem = ib_umem_get(&rdev->ibdev, req.cq_va,
+					 entries * sizeof(struct cq_base),
+					 IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(ibcq->umem))
+		return PTR_ERR(ibcq->umem);
+
+	cq->qplib_cq.sg_info.umem = cq->ib_cq.umem;
+	cq->qplib_cq.dpi = &uctx->dpi;
+
+	cq->qplib_cq.max_wqe = entries;
+	cq->qplib_cq.coalescing = &rdev->cq_coalescing;
+	cq->qplib_cq.nq = bnxt_re_get_nq(rdev);
+	cq->qplib_cq.cnq_hw_ring_id = cq->qplib_cq.nq->ring_id;
+
+	rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
+	if (rc)
+		goto create_cq;
+
+	cq->ib_cq.cqe = entries;
+	cq->cq_period = cq->qplib_cq.period;
+
+	active_cqs = atomic_inc_return(&rdev->stats.res.cq_count);
+	if (active_cqs > rdev->stats.res.cq_watermark)
+		rdev->stats.res.cq_watermark = active_cqs;
+	spin_lock_init(&cq->cq_lock);
+
+	if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+		/* Allocate a page */
+		cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!cq->uctx_cq_page) {
 			rc = -ENOMEM;
-			goto fail;
+			goto c2fail;
 		}
+		hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id);
+		resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT;
+	}
+	resp.cqid = cq->qplib_cq.id;
+	resp.tail = cq->qplib_cq.hwq.cons;
+	resp.phase = cq->qplib_cq.period;
+	rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+	if (rc) {
+		ibdev_err(&rdev->ibdev, "Failed to copy CQ udata");
+		goto free_mem;
+	}
 
-		cq->qplib_cq.dpi = &rdev->dpi_privileged;
+	return 0;
+
+free_mem:
+	if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT)
+		hash_del(&cq->hash_entry);
+	free_page((unsigned long)cq->uctx_cq_page);
+c2fail:
+	atomic_dec(&rdev->stats.res.cq_count);
+	bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+	/* UMEM is released by ib_core */
+create_cq:
+	bnxt_re_put_nq(rdev, cq->qplib_cq.nq);
+	return rc;
+}
+
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		      struct uverbs_attr_bundle *attrs)
+{
+	struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev);
+	struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+	int cqe = attr->cqe;
+	int rc, entries;
+	u32 active_cqs;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	/* Validate CQ fields */
+	if (attr->cqe > dev_attr->max_cq_wqes) {
+		ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded");
+		return -EINVAL;
 	}
+
+	cq->rdev = rdev;
+	cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
+
+	entries = bnxt_re_init_depth(cqe + 1, NULL);
+	if (entries > dev_attr->max_cq_wqes + 1)
+		entries = dev_attr->max_cq_wqes + 1;
+
+	cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+	cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
+
+	cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
+	cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe),
+			  GFP_KERNEL);
+	if (!cq->cql)
+		return -ENOMEM;
+
+	cq->qplib_cq.dpi = &rdev->dpi_privileged;
 	cq->qplib_cq.max_wqe = entries;
 	cq->qplib_cq.coalescing = &rdev->cq_coalescing;
 	cq->qplib_cq.nq = bnxt_re_get_nq(rdev);
@@ -3227,38 +3301,10 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		rdev->stats.res.cq_watermark = active_cqs;
 	spin_lock_init(&cq->cq_lock);
 
-	if (udata) {
-		struct bnxt_re_cq_resp resp = {};
-
-		if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
-			hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id);
-			/* Allocate a page */
-			cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL);
-			if (!cq->uctx_cq_page) {
-				rc = -ENOMEM;
-				goto c2fail;
-			}
-			resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT;
-		}
-		resp.cqid = cq->qplib_cq.id;
-		resp.tail = cq->qplib_cq.hwq.cons;
-		resp.phase = cq->qplib_cq.period;
-		resp.rsvd = 0;
-		rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
-		if (rc) {
-			ibdev_err(&rdev->ibdev, "Failed to copy CQ udata");
-			bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
-			goto free_mem;
-		}
-	}
-
 	return 0;
 
-free_mem:
-	free_page((unsigned long)cq->uctx_cq_page);
-c2fail:
-	ib_umem_release(cq->umem);
 fail:
+	bnxt_re_put_nq(rdev, cq->qplib_cq.nq);
 	kfree(cq->cql);
 	return rc;
 }
@@ -3271,8 +3317,8 @@ static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
 
 	cq->qplib_cq.max_wqe = cq->resize_cqe;
 	if (cq->resize_umem) {
-		ib_umem_release(cq->umem);
-		cq->umem = cq->resize_umem;
+		ib_umem_release(cq->ib_cq.umem);
+		cq->ib_cq.umem = cq->resize_umem;
 		cq->resize_umem = NULL;
 		cq->resize_cqe = 0;
 	}
@@ -3872,7 +3918,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 	/* User CQ; the only processing we do is to
 	 * complete any pending CQ resize operation.
 	 */
-	if (cq->umem) {
+	if (cq->ib_cq.umem) {
 		if (cq->resize_umem)
 			bnxt_re_resize_cq_complete(cq);
 		return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 76ba9ab04d5c..cac3e10b73f6 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -108,7 +108,6 @@ struct bnxt_re_cq {
 	struct bnxt_qplib_cqe	*cql;
 #define MAX_CQL_PER_POLL	1024
 	u32			max_cql;
-	struct ib_umem		*umem;
 	struct ib_umem		*resize_umem;
 	int			resize_cqe;
 	void			*uctx_cq_page;
@@ -247,6 +246,9 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
 		      const struct ib_recv_wr **bad_recv_wr);
 int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		      struct uverbs_attr_bundle *attrs);
+int bnxt_re_create_user_cq(struct ib_cq *ibcq,
+			   const struct ib_cq_init_attr *attr,
+			   struct uverbs_attr_bundle *attrs);
 int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 73003ad25ee8..368c1fd8172e 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1334,6 +1334,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
 	.alloc_ucontext = bnxt_re_alloc_ucontext,
 	.create_ah = bnxt_re_create_ah,
 	.create_cq = bnxt_re_create_cq,
+	.create_user_cq = bnxt_re_create_user_cq,
 	.create_qp = bnxt_re_create_qp,
 	.create_srq = bnxt_re_create_srq,
 	.create_user_ah = bnxt_re_create_ah,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 16/50] RDMA/cxgb4: Separate kernel and user CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Split the create CQ logic to clearly distinguish kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/cxgb4/cq.c       | 218 ++++++++++++++++++++++-----------
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |   2 +
 drivers/infiniband/hw/cxgb4/provider.c |   1 +
 3 files changed, 152 insertions(+), 69 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 14ced7b667fa..d263cca47432 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -994,8 +994,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 	return 0;
 }
 
-int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		   struct uverbs_attr_bundle *attrs)
+int c4iw_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
@@ -1012,25 +1012,21 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		udata, struct c4iw_ucontext, ibucontext);
 
 	pr_debug("ib_dev %p entries %d\n", ibdev, entries);
-	if (attr->flags)
+	if (attr->flags || ibcq->umem)
 		return -EOPNOTSUPP;
 
-	if (entries < 1 || entries > ibdev->attrs.max_cqe)
+	if (attr->cqe > ibdev->attrs.max_cqe)
 		return -EINVAL;
 
 	if (vector >= rhp->rdev.lldi.nciq)
 		return -EINVAL;
 
-	if (udata) {
-		if (udata->inlen < sizeof(ucmd))
-			ucontext->is_32b_cqe = 1;
-	}
+	if (udata->inlen < sizeof(ucmd))
+		ucontext->is_32b_cqe = 1;
 
 	chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
-	if (!chp->wr_waitp) {
-		ret = -ENOMEM;
-		goto err_free_chp;
-	}
+	if (!chp->wr_waitp)
+		return -ENOMEM;
 	c4iw_init_wr_wait(chp->wr_waitp);
 
 	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
@@ -1063,22 +1059,19 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	if (hwentries < 64)
 		hwentries = 64;
 
-	memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ?
+	memsize = hwentries * (ucontext->is_32b_cqe ?
 			(sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue));
 
 	/*
 	 * memsize must be a multiple of the page size if its a user cq.
 	 */
-	if (udata)
-		memsize = roundup(memsize, PAGE_SIZE);
+	memsize = roundup(memsize, PAGE_SIZE);
 
 	chp->cq.size = hwentries;
 	chp->cq.memsize = memsize;
 	chp->cq.vector = vector;
 
-	ret = create_cq(&rhp->rdev, &chp->cq,
-			ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
-			chp->wr_waitp);
+	ret = create_cq(&rhp->rdev, &chp->cq, &ucontext->uctx, chp->wr_waitp);
 	if (ret)
 		goto err_free_skb;
 
@@ -1093,54 +1086,52 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	if (ret)
 		goto err_destroy_cq;
 
-	if (ucontext) {
-		ret = -ENOMEM;
-		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
-		if (!mm)
-			goto err_remove_handle;
-		mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
-		if (!mm2)
-			goto err_free_mm;
-
-		memset(&uresp, 0, sizeof(uresp));
-		uresp.qid_mask = rhp->rdev.cqmask;
-		uresp.cqid = chp->cq.cqid;
-		uresp.size = chp->cq.size;
-		uresp.memsize = chp->cq.memsize;
-		spin_lock(&ucontext->mmap_lock);
-		uresp.key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
-		uresp.gts_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
-		/* communicate to the userspace that
-		 * kernel driver supports 64B CQE
-		 */
-		uresp.flags |= C4IW_64B_CQE;
-
-		spin_unlock(&ucontext->mmap_lock);
-		ret = ib_copy_to_udata(udata, &uresp,
-				       ucontext->is_32b_cqe ?
-				       sizeof(uresp) - sizeof(uresp.flags) :
-				       sizeof(uresp));
-		if (ret)
-			goto err_free_mm2;
-
-		mm->key = uresp.key;
-		mm->addr = 0;
-		mm->vaddr = chp->cq.queue;
-		mm->dma_addr = chp->cq.dma_addr;
-		mm->len = chp->cq.memsize;
-		insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
-		insert_mmap(ucontext, mm);
-
-		mm2->key = uresp.gts_key;
-		mm2->addr = chp->cq.bar2_pa;
-		mm2->len = PAGE_SIZE;
-		mm2->vaddr = NULL;
-		mm2->dma_addr = 0;
-		insert_flag_to_mmap(&rhp->rdev, mm2, mm2->addr);
-		insert_mmap(ucontext, mm2);
-	}
+	ret = -ENOMEM;
+	mm = kmalloc(sizeof(*mm), GFP_KERNEL);
+	if (!mm)
+		goto err_remove_handle;
+	mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
+	if (!mm2)
+		goto err_free_mm;
+
+	memset(&uresp, 0, sizeof(uresp));
+	uresp.qid_mask = rhp->rdev.cqmask;
+	uresp.cqid = chp->cq.cqid;
+	uresp.size = chp->cq.size;
+	uresp.memsize = chp->cq.memsize;
+	spin_lock(&ucontext->mmap_lock);
+	uresp.key = ucontext->key;
+	ucontext->key += PAGE_SIZE;
+	uresp.gts_key = ucontext->key;
+	ucontext->key += PAGE_SIZE;
+	/* communicate to the userspace that
+	 * kernel driver supports 64B CQE
+	 */
+	uresp.flags |= C4IW_64B_CQE;
+
+	spin_unlock(&ucontext->mmap_lock);
+	ret = ib_copy_to_udata(udata, &uresp,
+			       ucontext->is_32b_cqe ?
+			       sizeof(uresp) - sizeof(uresp.flags) :
+			       sizeof(uresp));
+	if (ret)
+		goto err_free_mm2;
+
+	mm->key = uresp.key;
+	mm->addr = 0;
+	mm->vaddr = chp->cq.queue;
+	mm->dma_addr = chp->cq.dma_addr;
+	mm->len = chp->cq.memsize;
+	insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
+	insert_mmap(ucontext, mm);
+
+	mm2->key = uresp.gts_key;
+	mm2->addr = chp->cq.bar2_pa;
+	mm2->len = PAGE_SIZE;
+	mm2->vaddr = NULL;
+	mm2->dma_addr = 0;
+	insert_flag_to_mmap(&rhp->rdev, mm2, mm2->addr);
+	insert_mmap(ucontext, mm2);
 
 	pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
 		 chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
@@ -1153,14 +1144,103 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 err_remove_handle:
 	xa_erase_irq(&rhp->cqs, chp->cq.cqid);
 err_destroy_cq:
-	destroy_cq(&chp->rhp->rdev, &chp->cq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+	destroy_cq(&chp->rhp->rdev, &chp->cq, &ucontext->uctx,
+		   chp->destroy_skb, chp->wr_waitp);
+err_free_skb:
+	kfree_skb(chp->destroy_skb);
+err_free_wr_wait:
+	c4iw_put_wr_wait(chp->wr_waitp);
+	return ret;
+}
+
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		   struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	int entries = attr->cqe;
+	int vector = attr->comp_vector;
+	struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device);
+	struct c4iw_cq *chp = to_c4iw_cq(ibcq);
+	int ret, wr_len;
+	size_t memsize, hwentries;
+
+	pr_debug("ib_dev %p entries %d\n", ibdev, entries);
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > ibdev->attrs.max_cqe)
+		return -EINVAL;
+
+	if (vector >= rhp->rdev.lldi.nciq)
+		return -EINVAL;
+
+	chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+	if (!chp->wr_waitp)
+		return -ENOMEM;
+	c4iw_init_wr_wait(chp->wr_waitp);
+
+	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+	chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+	if (!chp->destroy_skb) {
+		ret = -ENOMEM;
+		goto err_free_wr_wait;
+	}
+
+	/* account for the status page. */
+	entries++;
+
+	/* IQ needs one extra entry to differentiate full vs empty. */
+	entries++;
+
+	/*
+	 * entries must be multiple of 16 for HW.
+	 */
+	entries = roundup(entries, 16);
+
+	/*
+	 * Make actual HW queue 2x to avoid cdix_inc overflows.
+	 */
+	hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size);
+
+	/*
+	 * Make HW queue at least 64 entries so GTS updates aren't too
+	 * frequent.
+	 */
+	if (hwentries < 64)
+		hwentries = 64;
+
+	memsize = hwentries * sizeof(*chp->cq.queue);
+
+	chp->cq.size = hwentries;
+	chp->cq.memsize = memsize;
+	chp->cq.vector = vector;
+
+	ret = create_cq(&rhp->rdev, &chp->cq, &rhp->rdev.uctx, chp->wr_waitp);
+	if (ret)
+		goto err_free_skb;
+
+	chp->rhp = rhp;
+	chp->cq.size--;				/* status page */
+	chp->ibcq.cqe = entries - 2;
+	spin_lock_init(&chp->lock);
+	spin_lock_init(&chp->comp_handler_lock);
+	refcount_set(&chp->refcnt, 1);
+	init_completion(&chp->cq_rel_comp);
+	ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL);
+	if (ret)
+		goto err_destroy_cq;
+
+	pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
+		 chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
+		 &chp->cq.dma_addr);
+	return 0;
+err_destroy_cq:
+	destroy_cq(&chp->rhp->rdev, &chp->cq, &rhp->rdev.uctx,
 		   chp->destroy_skb, chp->wr_waitp);
 err_free_skb:
 	kfree_skb(chp->destroy_skb);
 err_free_wr_wait:
 	c4iw_put_wr_wait(chp->wr_waitp);
-err_free_chp:
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index e17c1252536b..b8e3ee2a0c84 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -1014,6 +1014,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
 void c4iw_cq_rem_ref(struct c4iw_cq *chp);
 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		   struct uverbs_attr_bundle *attrs);
+int c4iw_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			struct uverbs_attr_bundle *attrs);
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
 		    enum ib_srq_attr_mask srq_attr_mask,
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index e059f92d90fd..b9c183d1389d 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -461,6 +461,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
 	.alloc_pd = c4iw_allocate_pd,
 	.alloc_ucontext = c4iw_alloc_ucontext,
 	.create_cq = c4iw_create_cq,
+	.create_user_cq = c4iw_create_user_cq,
 	.create_qp = c4iw_create_qp,
 	.create_srq = c4iw_create_srq,
 	.dealloc_pd = c4iw_deallocate_pd,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 17/50] RDMA/mthca: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the create‑CQ logic into distinct user and kernel
code paths.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mthca/mthca_provider.c | 92 ++++++++++++++++++----------
 1 file changed, 58 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index aa5ca5c4ff77..6bf825978846 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -572,9 +572,9 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 	return 0;
 }
 
-static int mthca_create_cq(struct ib_cq *ibcq,
-			   const struct ib_cq_init_attr *attr,
-			   struct uverbs_attr_bundle *attrs)
+static int mthca_create_user_cq(struct ib_cq *ibcq,
+				const struct ib_cq_init_attr *attr,
+				struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
@@ -586,47 +586,41 @@ static int mthca_create_cq(struct ib_cq *ibcq,
 	struct mthca_ucontext *context = rdma_udata_to_drv_context(
 		udata, struct mthca_ucontext, ibucontext);
 
-	if (attr->flags)
+	if (attr->flags || ibcq->umem)
 		return -EOPNOTSUPP;
 
-	if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+	if (attr->cqe > to_mdev(ibdev)->limits.max_cqes)
 		return -EINVAL;
 
-	if (udata) {
-		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
-			return -EFAULT;
+	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+		return -EFAULT;
 
-		err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
-					context->db_tab, ucmd.set_db_index,
-					ucmd.set_db_page);
-		if (err)
-			return err;
+	err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
+				context->db_tab, ucmd.set_db_index,
+				ucmd.set_db_page);
+	if (err)
+		return err;
 
-		err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
-					context->db_tab, ucmd.arm_db_index,
-					ucmd.arm_db_page);
-		if (err)
-			goto err_unmap_set;
-	}
+	err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
+				context->db_tab, ucmd.arm_db_index,
+				ucmd.arm_db_page);
+	if (err)
+		goto err_unmap_set;
 
 	cq = to_mcq(ibcq);
 
-	if (udata) {
-		cq->buf.mr.ibmr.lkey = ucmd.lkey;
-		cq->set_ci_db_index  = ucmd.set_db_index;
-		cq->arm_db_index     = ucmd.arm_db_index;
-	}
+	cq->buf.mr.ibmr.lkey = ucmd.lkey;
+	cq->set_ci_db_index  = ucmd.set_db_index;
+	cq->arm_db_index     = ucmd.arm_db_index;
 
 	for (nent = 1; nent <= entries; nent <<= 1)
 		; /* nothing */
 
-	err = mthca_init_cq(to_mdev(ibdev), nent, context,
-			    udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
-			    cq);
+	err = mthca_init_cq(to_mdev(ibdev), nent, context, ucmd.pdn, cq);
 	if (err)
 		goto err_unmap_arm;
 
-	if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
+	if (ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
 		mthca_free_cq(to_mdev(ibdev), cq);
 		err = -EFAULT;
 		goto err_unmap_arm;
@@ -637,18 +631,47 @@ static int mthca_create_cq(struct ib_cq *ibcq,
 	return 0;
 
 err_unmap_arm:
-	if (udata)
-		mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
-				    context->db_tab, ucmd.arm_db_index);
+	mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
+			    context->db_tab, ucmd.arm_db_index);
 
 err_unmap_set:
-	if (udata)
-		mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
-				    context->db_tab, ucmd.set_db_index);
+	mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
+			    context->db_tab, ucmd.set_db_index);
 
 	return err;
 }
 
+static int mthca_create_cq(struct ib_cq *ibcq,
+			   const struct ib_cq_init_attr *attr,
+			   struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	int entries = attr->cqe;
+	struct mthca_cq *cq;
+	int nent;
+	int err;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > to_mdev(ibdev)->limits.max_cqes)
+		return -EINVAL;
+
+	cq = to_mcq(ibcq);
+
+	for (nent = 1; nent <= entries; nent <<= 1)
+		; /* nothing */
+
+	err = mthca_init_cq(to_mdev(ibdev), nent, NULL,
+			    to_mdev(ibdev)->driver_pd.pd_num, cq);
+	if (err)
+		return err;
+
+	cq->resize_buf = NULL;
+
+	return 0;
+}
+
 static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
 				  int entries)
 {
@@ -1070,6 +1093,7 @@ static const struct ib_device_ops mthca_dev_ops = {
 	.attach_mcast = mthca_multicast_attach,
 	.create_ah = mthca_ah_create,
 	.create_cq = mthca_create_cq,
+	.create_user_cq = mthca_create_user_cq,
 	.create_qp = mthca_create_qp,
 	.dealloc_pd = mthca_dealloc_pd,
 	.dealloc_ucontext = mthca_dealloc_ucontext,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 18/50] RDMA/erdma: Separate user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Split CQ creation into distinct kernel and user flows. The erdma driver,
inherited from mlx4, uses a problematic pattern that shares and caches
umem in erdma_map_user_dbrecords(). This design blocks the driver from
supporting generic umem sources (VMA, dmabuf, memfd, and others).

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/erdma/erdma_main.c  |  1 +
 drivers/infiniband/hw/erdma/erdma_verbs.c | 97 ++++++++++++++++++++-----------
 drivers/infiniband/hw/erdma/erdma_verbs.h |  2 +
 3 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index f35b30235018..1b6426e89d80 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -505,6 +505,7 @@ static const struct ib_device_ops erdma_device_ops = {
 	.alloc_pd = erdma_alloc_pd,
 	.alloc_ucontext = erdma_alloc_ucontext,
 	.create_cq = erdma_create_cq,
+	.create_user_cq = erdma_create_user_cq,
 	.create_qp = erdma_create_qp,
 	.dealloc_pd = erdma_dealloc_pd,
 	.dealloc_ucontext = erdma_dealloc_ucontext,
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 058edc42de58..6f809907fec5 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1952,8 +1952,8 @@ static int erdma_init_kernel_cq(struct erdma_cq *cq)
 	return -ENOMEM;
 }
 
-int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		    struct uverbs_attr_bundle *attrs)
+int erdma_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct erdma_cq *cq = to_ecq(ibcq);
@@ -1962,6 +1962,11 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	int ret;
 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
 		udata, struct erdma_ucontext, ibucontext);
+	struct erdma_ureq_create_cq ureq;
+	struct erdma_uresp_create_cq uresp;
+
+	if (ibcq->umem)
+		return -EOPNOTSUPP;
 
 	if (depth > dev->attrs.max_cqe)
 		return -EINVAL;
@@ -1977,31 +1982,22 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	if (ret < 0)
 		return ret;
 
-	if (!rdma_is_kernel_res(&ibcq->res)) {
-		struct erdma_ureq_create_cq ureq;
-		struct erdma_uresp_create_cq uresp;
-
-		ret = ib_copy_from_udata(&ureq, udata,
-					 min(udata->inlen, sizeof(ureq)));
-		if (ret)
-			goto err_out_xa;
+	ret = ib_copy_from_udata(&ureq, udata,
+				 min(udata->inlen, sizeof(ureq)));
+	if (ret)
+		goto err_out_xa;
 
-		ret = erdma_init_user_cq(ctx, cq, &ureq);
-		if (ret)
-			goto err_out_xa;
+	ret = erdma_init_user_cq(ctx, cq, &ureq);
+	if (ret)
+		goto err_out_xa;
 
-		uresp.cq_id = cq->cqn;
-		uresp.num_cqe = depth;
+	uresp.cq_id = cq->cqn;
+	uresp.num_cqe = depth;
 
-		ret = ib_copy_to_udata(udata, &uresp,
-				       min(sizeof(uresp), udata->outlen));
-		if (ret)
-			goto err_free_res;
-	} else {
-		ret = erdma_init_kernel_cq(cq);
-		if (ret)
-			goto err_out_xa;
-	}
+	ret = ib_copy_to_udata(udata, &uresp,
+			       min(sizeof(uresp), udata->outlen));
+	if (ret)
+		goto err_free_res;
 
 	ret = create_cq_cmd(ctx, cq);
 	if (ret)
@@ -2010,19 +2006,54 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	return 0;
 
 err_free_res:
-	if (!rdma_is_kernel_res(&ibcq->res)) {
-		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
-		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
-	} else {
-		dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
-				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
-		dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
-			      cq->kern_cq.dbrec_dma);
-	}
+	erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+	put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
 
 err_out_xa:
 	xa_erase(&dev->cq_xa, cq->cqn);
+	return ret;
+}
+
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		    struct uverbs_attr_bundle *attrs)
+{
+	struct erdma_cq *cq = to_ecq(ibcq);
+	struct erdma_dev *dev = to_edev(ibcq->device);
+	unsigned int depth = attr->cqe;
+	int ret;
+
+	if (depth > dev->attrs.max_cqe)
+		return -EINVAL;
 
+	depth = roundup_pow_of_two(depth);
+	cq->ibcq.cqe = depth;
+	cq->depth = depth;
+	cq->assoc_eqn = attr->comp_vector + 1;
+
+	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
+			      XA_LIMIT(1, dev->attrs.max_cq - 1),
+			      &dev->next_alloc_cqn, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	ret = erdma_init_kernel_cq(cq);
+	if (ret)
+		goto err_out_xa;
+
+	ret = create_cq_cmd(NULL, cq);
+	if (ret)
+		goto err_free_res;
+
+	return 0;
+
+err_free_res:
+	dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
+			  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+	dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+		      cq->kern_cq.dbrec_dma);
+
+err_out_xa:
+	xa_erase(&dev->cq_xa, cq->cqn);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index 7d8d3fe501d5..21a4fb404806 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -435,6 +435,8 @@ int erdma_get_port_immutable(struct ib_device *dev, u32 port,
 			     struct ib_port_immutable *ib_port_immutable);
 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		    struct uverbs_attr_bundle *attrs);
+int erdma_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs);
 int erdma_query_port(struct ib_device *dev, u32 port,
 		     struct ib_port_attr *attr);
 int erdma_query_gid(struct ib_device *dev, u32 port, int idx,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 20/50] RDMA/qedr: Convert to modern CQ interface
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Allow users to supply their own umem.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/qedr/main.c  |   1 +
 drivers/infiniband/hw/qedr/verbs.c | 323 +++++++++++++++++++++----------------
 drivers/infiniband/hw/qedr/verbs.h |   2 +
 3 files changed, 188 insertions(+), 138 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ecdfeff3d44f..c6ca95983492 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -199,6 +199,7 @@ static const struct ib_device_ops qedr_dev_ops = {
 	.alloc_ucontext = qedr_alloc_ucontext,
 	.create_ah = qedr_create_ah,
 	.create_cq = qedr_create_cq,
+	.create_user_cq = qedr_create_user_cq,
 	.create_qp = qedr_create_qp,
 	.create_srq = qedr_create_srq,
 	.dealloc_pd = qedr_dealloc_pd,
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index cb06c5d894b8..10010ccf63b3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -789,52 +789,33 @@ static int qedr_init_user_db_rec(struct ib_udata *udata,
 
 static inline int qedr_init_user_queue(struct ib_udata *udata,
 				       struct qedr_dev *dev,
-				       struct qedr_userq *q, u64 buf_addr,
-				       size_t buf_len, bool requires_db_rec,
-				       int access,
+				       struct qedr_userq *q,
+				       bool requires_db_rec,
 				       int alloc_and_init)
 {
 	u32 fw_pages;
 	int rc;
 
-	q->buf_addr = buf_addr;
-	q->buf_len = buf_len;
-	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
-	if (IS_ERR(q->umem)) {
-		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
-		       PTR_ERR(q->umem));
-		return PTR_ERR(q->umem);
-	}
-
 	fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
 	if (rc)
-		goto err0;
+		return rc;
 
 	if (alloc_and_init) {
 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
-		if (IS_ERR(q->pbl_tbl)) {
-			rc = PTR_ERR(q->pbl_tbl);
-			goto err0;
-		}
+		if (IS_ERR(q->pbl_tbl))
+			return PTR_ERR(q->pbl_tbl);
+
 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
 				   FW_PAGE_SHIFT);
 	} else {
 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
-		if (!q->pbl_tbl) {
-			rc = -ENOMEM;
-			goto err0;
-		}
+		if (!q->pbl_tbl)
+			return -ENOMEM;
 	}
 
 	/* mmap the user address used to store doorbell data for recovery */
 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
-
-err0:
-	ib_umem_release(q->umem);
-	q->umem = NULL;
-
-	return rc;
 }
 
 static inline void qedr_init_cq_params(struct qedr_cq *cq,
@@ -899,8 +880,8 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	return 0;
 }
 
-int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		   struct uverbs_attr_bundle *attrs)
+int qedr_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
@@ -908,6 +889,104 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		udata, struct qedr_ucontext, ibucontext);
 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
+	struct qedr_dev *dev = get_qedr_dev(ibdev);
+	struct qed_rdma_create_cq_in_params params;
+	struct qedr_create_cq_ureq ureq = {};
+	int vector = attr->comp_vector;
+	int entries = attr->cqe;
+	struct qedr_cq *cq = get_qedr_cq(ibcq);
+	int chain_entries;
+	u32 db_offset;
+	int page_cnt;
+	u64 pbl_ptr;
+	u16 icid;
+	int rc;
+
+	DP_DEBUG(dev, QEDR_MSG_INIT,
+		 "create_cq: called from User Lib. entries=%d, vector=%d\n",
+		 entries, vector);
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > QEDR_MAX_CQES)
+		return -EINVAL;
+
+	chain_entries = qedr_align_cq_entries(entries);
+	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+
+	/* calc db offset. user will add DPI base, kernel will add db addr */
+	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+
+	if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)))
+		return -EINVAL;
+
+	cq->cq_type = QEDR_CQ_TYPE_USER;
+
+	cq->q.buf_addr = ureq.addr;
+	cq->q.buf_len = ureq.len;
+	if (!ibcq->umem)
+		ibcq->umem = ib_umem_get(&dev->ibdev, ureq.addr, ureq.len,
+					 IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(ibcq->umem))
+		return PTR_ERR(ibcq->umem);
+	cq->q.umem = ibcq->umem;
+
+	rc = qedr_init_user_queue(udata, dev, &cq->q, true, 1);
+	if (rc)
+		return rc;
+
+	pbl_ptr = cq->q.pbl_tbl->pa;
+	page_cnt = cq->q.pbl_info.num_pbes;
+
+	cq->ibcq.cqe = chain_entries;
+	cq->q.db_addr = ctx->dpi_addr + db_offset;
+
+	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
+			    pbl_ptr, &params);
+
+	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
+	if (rc)
+		goto err1;
+
+	cq->icid = icid;
+	cq->sig = QEDR_CQ_MAGIC_NUMBER;
+	spin_lock_init(&cq->cq_lock);
+
+	rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
+	if (rc)
+		goto err2;
+
+	rc = qedr_db_recovery_add(dev, cq->q.db_addr,
+				  &cq->q.db_rec_data->db_data,
+				  DB_REC_WIDTH_64B,
+				  DB_REC_USER);
+	if (rc)
+		goto err2;
+
+	DP_DEBUG(dev, QEDR_MSG_CQ,
+		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
+		 cq->icid, cq, params.cq_size);
+
+	return 0;
+
+err2:
+	destroy_iparams.icid = cq->icid;
+	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
+				  &destroy_oparams);
+err1:
+	qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+	if (cq->q.db_mmap_entry)
+		rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+	return rc;
+}
+
+int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		   struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	struct qed_rdma_destroy_cq_out_params destroy_oparams;
+	struct qed_rdma_destroy_cq_in_params destroy_iparams;
 	struct qed_chain_init_params chain_params = {
 		.mode		= QED_CHAIN_MODE_PBL,
 		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
@@ -916,7 +995,6 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	};
 	struct qedr_dev *dev = get_qedr_dev(ibdev);
 	struct qed_rdma_create_cq_in_params params;
-	struct qedr_create_cq_ureq ureq = {};
 	int vector = attr->comp_vector;
 	int entries = attr->cqe;
 	struct qedr_cq *cq = get_qedr_cq(ibcq);
@@ -928,18 +1006,14 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	int rc;
 
 	DP_DEBUG(dev, QEDR_MSG_INIT,
-		 "create_cq: called from %s. entries=%d, vector=%d\n",
-		 udata ? "User Lib" : "Kernel", entries, vector);
+		 "create_cq: called from Kernel. entries=%d, vector=%d\n",
+		 entries, vector);
 
 	if (attr->flags)
 		return -EOPNOTSUPP;
 
-	if (entries > QEDR_MAX_CQES) {
-		DP_ERR(dev,
-		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
-		       entries, QEDR_MAX_CQES);
+	if (attr->cqe > QEDR_MAX_CQES)
 		return -EINVAL;
-	}
 
 	chain_entries = qedr_align_cq_entries(entries);
 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
@@ -948,47 +1022,18 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	/* calc db offset. user will add DPI base, kernel will add db addr */
 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
 
-	if (udata) {
-		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
-							 udata->inlen))) {
-			DP_ERR(dev,
-			       "create cq: problem copying data from user space\n");
-			goto err0;
-		}
+	cq->cq_type = QEDR_CQ_TYPE_KERNEL;
 
-		if (!ureq.len) {
-			DP_ERR(dev,
-			       "create cq: cannot create a cq with 0 entries\n");
-			goto err0;
-		}
-
-		cq->cq_type = QEDR_CQ_TYPE_USER;
-
-		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
-					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
-					  1);
-		if (rc)
-			goto err0;
-
-		pbl_ptr = cq->q.pbl_tbl->pa;
-		page_cnt = cq->q.pbl_info.num_pbes;
-
-		cq->ibcq.cqe = chain_entries;
-		cq->q.db_addr = ctx->dpi_addr + db_offset;
-	} else {
-		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
+	rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
+					   &chain_params);
+	if (rc)
+		return rc;
 
-		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
-						   &chain_params);
-		if (rc)
-			goto err0;
+	page_cnt = qed_chain_get_page_cnt(&cq->pbl);
+	pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
+	cq->ibcq.cqe = cq->pbl.capacity;
 
-		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
-		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
-		cq->ibcq.cqe = cq->pbl.capacity;
-	}
-
-	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
+	qedr_init_cq_params(cq, NULL, dev, vector, chain_entries, page_cnt,
 			    pbl_ptr, &params);
 
 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
@@ -999,37 +1044,23 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
 	spin_lock_init(&cq->cq_lock);
 
-	if (udata) {
-		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
-		if (rc)
-			goto err2;
-
-		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
-					  &cq->q.db_rec_data->db_data,
-					  DB_REC_WIDTH_64B,
-					  DB_REC_USER);
-		if (rc)
-			goto err2;
+	/* Generate doorbell address. */
+	cq->db.data.icid = cq->icid;
+	cq->db_addr = dev->db_addr + db_offset;
+	cq->db.data.params = DB_AGG_CMD_MAX <<
+	    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
 
-	} else {
-		/* Generate doorbell address. */
-		cq->db.data.icid = cq->icid;
-		cq->db_addr = dev->db_addr + db_offset;
-		cq->db.data.params = DB_AGG_CMD_MAX <<
-		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
-
-		/* point to the very last element, passing it we will toggle */
-		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
-		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
-		cq->latest_cqe = NULL;
-		consume_cqe(cq);
-		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+	/* point to the very last element, passing it we will toggle */
+	cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
+	cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+	cq->latest_cqe = NULL;
+	consume_cqe(cq);
+	cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
 
-		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
-					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
-		if (rc)
-			goto err2;
-	}
+	rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
+				  DB_REC_WIDTH_64B, DB_REC_KERNEL);
+	if (rc)
+		goto err2;
 
 	DP_DEBUG(dev, QEDR_MSG_CQ,
 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
@@ -1042,16 +1073,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
 				  &destroy_oparams);
 err1:
-	if (udata) {
-		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
-		ib_umem_release(cq->q.umem);
-		if (cq->q.db_mmap_entry)
-			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
-	} else {
-		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
-	}
-err0:
-	return -EINVAL;
+	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+	return rc;
 }
 
 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
@@ -1081,7 +1104,6 @@ int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 
 	if (udata) {
 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
-		ib_umem_release(cq->q.umem);
 
 		if (cq->q.db_rec_data) {
 			qedr_db_recovery_del(dev, cq->q.db_addr,
@@ -1472,26 +1494,33 @@ static int qedr_init_srq_user_params(struct ib_udata *udata,
 	struct scatterlist *sg;
 	int rc;
 
-	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
-				  ureq->srq_len, false, access, 1);
+	srq->usrq.buf_addr = ureq->srq_addr;
+	srq->usrq.buf_len = ureq->srq_len;
+	srq->usrq.umem = ib_umem_get(&srq->dev->ibdev, ureq->srq_addr,
+				     ureq->srq_len, access);
+	if (IS_ERR(srq->usrq.umem))
+		return PTR_ERR(srq->usrq.umem);
+
+	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, false, 1);
 	if (rc)
-		return rc;
+		goto err_umem;
 
 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
 				     sizeof(struct rdma_srq_producers), access);
 	if (IS_ERR(srq->prod_umem)) {
+		rc = PTR_ERR(srq->prod_umem);
 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
-		ib_umem_release(srq->usrq.umem);
-		DP_ERR(srq->dev,
-		       "create srq: failed ib_umem_get for producer, got %ld\n",
-		       PTR_ERR(srq->prod_umem));
-		return PTR_ERR(srq->prod_umem);
+		goto err_umem;
 	}
 
 	sg = srq->prod_umem->sgt_append.sgt.sgl;
 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
 
 	return 0;
+
+err_umem:
+	ib_umem_release(srq->usrq.umem);
+	return rc;
 }
 
 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
@@ -1870,27 +1899,34 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 
 	if (qedr_qp_has_sq(qp)) {
 		/* SQ - read access only (0) */
-		rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
-					  ureq.sq_len, true, 0, alloc_and_init);
+		qp->usq.buf_addr = ureq.sq_addr;
+		qp->usq.buf_len = ureq.sq_len;
+		qp->usq.umem = ib_umem_get(&dev->ibdev, ureq.sq_addr,
+					   ureq.sq_len, 0);
+		if (IS_ERR(qp->usq.umem))
+			return PTR_ERR(qp->usq.umem);
+
+		rc = qedr_init_user_queue(udata, dev, &qp->usq, true,
+					  alloc_and_init);
 		if (rc)
-			return rc;
+			goto err_sq_umem;
 	}
 
 	if (qedr_qp_has_rq(qp)) {
 		/* RQ - read access only (0) */
-		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
-					  ureq.rq_len, true, 0, alloc_and_init);
-		if (rc) {
-			ib_umem_release(qp->usq.umem);
-			qp->usq.umem = NULL;
-			if (rdma_protocol_roce(&dev->ibdev, 1)) {
-				qedr_free_pbl(dev, &qp->usq.pbl_info,
-					      qp->usq.pbl_tbl);
-			} else {
-				kfree(qp->usq.pbl_tbl);
-			}
-			return rc;
+		qp->urq.buf_addr = ureq.rq_addr;
+		qp->urq.buf_len = ureq.rq_len;
+		qp->urq.umem = ib_umem_get(&dev->ibdev, ureq.rq_addr,
+					   ureq.rq_len, 0);
+		if (IS_ERR(qp->urq.umem)) {
+			rc = PTR_ERR(qp->urq.umem);
+			goto err_rq_umem;
 		}
+
+		rc = qedr_init_user_queue(udata, dev, &qp->urq, true,
+					  alloc_and_init);
+		if (rc)
+			goto err_rq_umem2;
 	}
 
 	memset(&in_params, 0, sizeof(in_params));
@@ -1989,6 +2025,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 err1:
 	qedr_cleanup_user(dev, ctx, qp);
 	return rc;
+
+err_rq_umem2:
+	ib_umem_release(qp->urq.umem);
+err_rq_umem:
+	if (rdma_protocol_roce(&dev->ibdev, 1))
+		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+	else
+		kfree(qp->usq.pbl_tbl);
+err_sq_umem:
+	ib_umem_release(qp->usq.umem);
+	return rc;
 }
 
 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 62420a15101b..292d77df562d 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -53,6 +53,8 @@ int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
 int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		   struct uverbs_attr_bundle *attrs);
+int qedr_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			struct uverbs_attr_bundle *attrs);
 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 21/50] RDMA/vmw_pvrdma: Provide a modern CQ creation interface
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The uverbs CQ creation UAPI allows users to supply their own umem for a CQ.
Update vmw_pvrdma to support this workflow while preserving support for creating
umem through the legacy interface.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c    | 171 ++++++++++++++++--------
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c  |   1 +
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h |   3 +
 3 files changed, 121 insertions(+), 54 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index b3df6eb9b8ef..c43c363565c1 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -90,16 +90,9 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq,
 	return has_data;
 }
 
-/**
- * pvrdma_create_cq - create completion queue
- * @ibcq: Allocated CQ
- * @attr: completion queue attributes
- * @attrs: bundle
- *
- * @return: 0 on success
- */
-int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		     struct uverbs_attr_bundle *attrs)
+int pvrdma_create_user_cq(struct ib_cq *ibcq,
+			  const struct ib_cq_init_attr *attr,
+			  struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
@@ -123,58 +116,48 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	if (attr->flags)
 		return -EOPNOTSUPP;
 
-	entries = roundup_pow_of_two(entries);
-	if (entries < 1 || entries > dev->dsr->caps.max_cqe)
+	if (attr->cqe > dev->dsr->caps.max_cqe)
 		return -EINVAL;
 
+	entries = roundup_pow_of_two(entries);
+
 	if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
 		return -ENOMEM;
 
 	cq->ibcq.cqe = entries;
-	cq->is_kernel = !udata;
-
-	if (!cq->is_kernel) {
-		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
-			ret = -EFAULT;
-			goto err_cq;
-		}
-
-		cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
-				       IB_ACCESS_LOCAL_WRITE);
-		if (IS_ERR(cq->umem)) {
-			ret = PTR_ERR(cq->umem);
-			goto err_cq;
-		}
+	cq->is_kernel = false;
 
-		npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
-	} else {
-		/* One extra page for shared ring state */
-		npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
-			      PAGE_SIZE - 1) / PAGE_SIZE;
+	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+		ret = -EFAULT;
+		goto err_cq;
+	}
 
-		/* Skip header page. */
-		cq->offset = PAGE_SIZE;
+	if (!ibcq->umem)
+		ibcq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
+					 IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(ibcq->umem)) {
+		ret = PTR_ERR(ibcq->umem);
+		goto err_cq;
 	}
 
+	npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
+
 	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 		dev_warn(&dev->pdev->dev,
 			 "overflow pages in completion queue\n");
 		ret = -EINVAL;
-		goto err_umem;
+		goto err_cq;
 	}
 
-	ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
+	ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, false);
 	if (ret) {
 		dev_warn(&dev->pdev->dev,
 			 "could not allocate page directory\n");
-		goto err_umem;
+		goto err_cq;
 	}
 
 	/* Ring state is always the first page. Set in library for user cq. */
-	if (cq->is_kernel)
-		cq->ring_state = cq->pdir.pages[0];
-	else
-		pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
+	pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
 
 	refcount_set(&cq->refcnt, 1);
 	init_completion(&cq->free);
@@ -183,7 +166,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
 	cmd->nchunks = npages;
-	cmd->ctx_handle = context ? context->ctx_handle : 0;
+	cmd->ctx_handle = context->ctx_handle;
 	cmd->cqe = entries;
 	cmd->pdir_dma = cq->pdir.dir_dma;
 	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
@@ -200,24 +183,106 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
 	spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
 
-	if (!cq->is_kernel) {
-		cq->uar = &context->uar;
+	cq->uar = &context->uar;
 
-		/* Copy udata back. */
-		if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
-			dev_warn(&dev->pdev->dev,
-				 "failed to copy back udata\n");
-			pvrdma_destroy_cq(&cq->ibcq, udata);
-			return -EINVAL;
-		}
+	/* Copy udata back. */
+	if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
+		dev_warn(&dev->pdev->dev,
+			 "failed to copy back udata\n");
+		pvrdma_destroy_cq(&cq->ibcq, udata);
+		return -EINVAL;
 	}
 
 	return 0;
 
 err_page_dir:
 	pvrdma_page_dir_cleanup(dev, &cq->pdir);
-err_umem:
-	ib_umem_release(cq->umem);
+err_cq:
+	atomic_dec(&dev->num_cqs);
+	return ret;
+}
+
+int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		     struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	int entries = attr->cqe;
+	struct pvrdma_dev *dev = to_vdev(ibdev);
+	struct pvrdma_cq *cq = to_vcq(ibcq);
+	int ret;
+	int npages;
+	unsigned long flags;
+	union pvrdma_cmd_req req;
+	union pvrdma_cmd_resp rsp;
+	struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
+	struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
+
+	BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > dev->dsr->caps.max_cqe)
+		return -EINVAL;
+	entries = roundup_pow_of_two(entries);
+
+	if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
+		return -ENOMEM;
+
+	cq->ibcq.cqe = entries;
+	cq->is_kernel = true;
+
+	/* One extra page for shared ring state */
+	npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
+		      PAGE_SIZE - 1) / PAGE_SIZE;
+
+	/* Skip header page. */
+	cq->offset = PAGE_SIZE;
+
+	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
+		dev_warn(&dev->pdev->dev,
+			 "overflow pages in completion queue\n");
+		ret = -EINVAL;
+		goto err_cq;
+	}
+
+	ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, true);
+	if (ret) {
+		dev_warn(&dev->pdev->dev,
+			 "could not allocate page directory\n");
+		goto err_cq;
+	}
+
+	/* Ring state is always the first page. Set in library for user cq. */
+	cq->ring_state = cq->pdir.pages[0];
+
+	refcount_set(&cq->refcnt, 1);
+	init_completion(&cq->free);
+	spin_lock_init(&cq->cq_lock);
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
+	cmd->nchunks = npages;
+	cmd->ctx_handle = 0;
+	cmd->cqe = entries;
+	cmd->pdir_dma = cq->pdir.dir_dma;
+	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
+	if (ret < 0) {
+		dev_warn(&dev->pdev->dev,
+			 "could not create completion queue, error: %d\n", ret);
+		goto err_page_dir;
+	}
+
+	cq->ibcq.cqe = resp->cqe;
+	cq->cq_handle = resp->cq_handle;
+	spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+	dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
+	spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+	return 0;
+
+err_page_dir:
+	pvrdma_page_dir_cleanup(dev, &cq->pdir);
 err_cq:
 	atomic_dec(&dev->num_cqs);
 	return ret;
@@ -229,8 +294,6 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
 		complete(&cq->free);
 	wait_for_completion(&cq->free);
 
-	ib_umem_release(cq->umem);
-
 	pvrdma_page_dir_cleanup(dev, &cq->pdir);
 }
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 1664d1d7d969..3f5b94a1e517 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -194,6 +194,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
 	.alloc_ucontext = pvrdma_alloc_ucontext,
 	.create_ah = pvrdma_create_ah,
 	.create_cq = pvrdma_create_cq,
+	.create_user_cq = pvrdma_create_user_cq,
 	.create_qp = pvrdma_create_qp,
 	.dealloc_pd = pvrdma_dealloc_pd,
 	.dealloc_ucontext = pvrdma_dealloc_ucontext,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 603e5a9311eb..18910d336744 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -375,6 +375,9 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 		     int sg_nents, unsigned int *sg_offset);
 int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		     struct uverbs_attr_bundle *attrs);
+int pvrdma_create_user_cq(struct ib_cq *ibcq,
+			  const struct ib_cq_init_attr *attr,
+			  struct uverbs_attr_bundle *attrs);
 int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 19/50] RDMA/ionic: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows. The ionic
driver may allocate two umems per CQ, and the current layout prevents it from
supporting generic umem sources (VMA, dmabuf, memfd, and others).

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/ionic/ionic_controlpath.c | 88 +++++++++++++++++--------
 drivers/infiniband/hw/ionic/ionic_ibdev.c       |  1 +
 drivers/infiniband/hw/ionic/ionic_ibdev.h       |  2 +
 3 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c
index ea12d9b8e125..5b8b6baaf5d4 100644
--- a/drivers/infiniband/hw/ionic/ionic_controlpath.c
+++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -89,7 +89,7 @@ int ionic_create_cq_common(struct ionic_vcq *vcq,
 
 	cq->vcq = vcq;
 
-	if (attr->cqe < 1 || attr->cqe + IONIC_CQ_GRACE > 0xffff) {
+	if (attr->cqe > 0xffff - IONIC_CQ_GRACE) {
 		rc = -EINVAL;
 		goto err_args;
 	}
@@ -1209,8 +1209,8 @@ static int ionic_destroy_cq_cmd(struct ionic_ibdev *dev, u32 cqid)
 	return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
 }
 
-int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		    struct uverbs_attr_bundle *attrs)
+int ionic_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs)
 {
 	struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
 	struct ib_udata *udata = &attrs->driver_udata;
@@ -1222,21 +1222,18 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct ionic_cq_req req;
 	int udma_idx = 0, rc;
 
-	if (udata) {
-		rc = ib_copy_from_udata(&req, udata, sizeof(req));
-		if (rc)
-			return rc;
-	}
+	if (ibcq->umem)
+		return -EOPNOTSUPP;
 
-	vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+	rc = ib_copy_from_udata(&req, udata, sizeof(req));
+	if (rc)
+		return rc;
 
-	if (udata)
-		vcq->udma_mask &= req.udma_mask;
+	vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+	vcq->udma_mask &= req.udma_mask;
 
-	if (!vcq->udma_mask) {
-		rc = -EINVAL;
-		goto err_init;
-	}
+	if (!vcq->udma_mask)
+		return -EINVAL;
 
 	for (; udma_idx < dev->lif_cfg.udma_count; ++udma_idx) {
 		if (!(vcq->udma_mask & BIT(udma_idx)))
@@ -1247,24 +1244,25 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 					    &resp.cqid[udma_idx],
 					    udma_idx);
 		if (rc)
-			goto err_init;
+			goto err_resp;
 
 		rc = ionic_create_cq_cmd(dev, ctx, &vcq->cq[udma_idx], &buf);
-		if (rc)
-			goto err_cmd;
+		if (rc) {
+			ionic_pgtbl_unbuf(dev, &buf);
+			ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+			goto err_resp;
+		}
 
 		ionic_pgtbl_unbuf(dev, &buf);
 	}
 
 	vcq->ibcq.cqe = attr->cqe;
 
-	if (udata) {
-		resp.udma_mask = vcq->udma_mask;
+	resp.udma_mask = vcq->udma_mask;
 
-		rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
-		if (rc)
-			goto err_resp;
-	}
+	rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (rc)
+		goto err_resp;
 
 	return 0;
 
@@ -1274,11 +1272,47 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		if (!(vcq->udma_mask & BIT(udma_idx)))
 			continue;
 		ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
-err_cmd:
 		ionic_pgtbl_unbuf(dev, &buf);
 		ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
-err_init:
-		;
+	}
+
+	return rc;
+}
+
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		    struct uverbs_attr_bundle *attrs)
+{
+	struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+	struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+	struct ionic_tbl_buf buf = {};
+	int udma_idx = 0, rc;
+
+	vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+	for (; udma_idx < dev->lif_cfg.udma_count; ++udma_idx) {
+		rc = ionic_create_cq_common(vcq, &buf, attr, NULL, NULL, NULL,
+					    NULL, udma_idx);
+		if (rc)
+			goto err_resp;
+
+		rc = ionic_create_cq_cmd(dev, NULL, &vcq->cq[udma_idx], &buf);
+		if (rc) {
+			ionic_pgtbl_unbuf(dev, &buf);
+			ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+			goto err_resp;
+		}
+
+		ionic_pgtbl_unbuf(dev, &buf);
+	}
+
+	vcq->ibcq.cqe = attr->cqe;
+
+	return 0;
+
+err_resp:
+	while (udma_idx--) {
+		ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+		ionic_pgtbl_unbuf(dev, &buf);
+		ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
 	}
 
 	return rc;
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c
index 164046d00e5d..32321a8996d6 100644
--- a/drivers/infiniband/hw/ionic/ionic_ibdev.c
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -229,6 +229,7 @@ static const struct ib_device_ops ionic_dev_ops = {
 	.alloc_mw = ionic_alloc_mw,
 	.dealloc_mw = ionic_dealloc_mw,
 	.create_cq = ionic_create_cq,
+	.create_user_cq = ionic_create_user_cq,
 	.destroy_cq = ionic_destroy_cq,
 	.create_qp = ionic_create_qp,
 	.modify_qp = ionic_modify_qp,
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h
index 63828240d659..0bcb8be6fb62 100644
--- a/drivers/infiniband/hw/ionic/ionic_ibdev.h
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h
@@ -482,6 +482,8 @@ int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
 int ionic_dealloc_mw(struct ib_mw *ibmw);
 int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		    struct uverbs_attr_bundle *attrs);
+int ionic_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs);
 int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
 		    struct ib_udata *udata);

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 23/50] RDMA/irdma: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/irdma/verbs.c | 310 +++++++++++++++++++++++-------------
 1 file changed, 195 insertions(+), 115 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index cf8d19150574..f2b3cfe125af 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -2461,15 +2461,9 @@ static inline int cq_validate_flags(u32 flags, u8 hw_rev)
 	return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0;
 }
 
-/**
- * irdma_create_cq - create cq
- * @ibcq: CQ allocated
- * @attr: attributes for cq
- * @attrs: uverbs attribute bundle
- */
-static int irdma_create_cq(struct ib_cq *ibcq,
-			   const struct ib_cq_init_attr *attr,
-			   struct uverbs_attr_bundle *attrs)
+static int irdma_create_user_cq(struct ib_cq *ibcq,
+				const struct ib_cq_init_attr *attr,
+				struct uverbs_attr_bundle *attrs)
 {
 #define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf)
 #define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size)
@@ -2489,14 +2483,22 @@ static int irdma_create_cq(struct ib_cq *ibcq,
 	int err_code;
 	int entries = attr->cqe;
 	bool cqe_64byte_ena;
-	u8 cqe_size;
+	struct irdma_ucontext *ucontext;
+	struct irdma_create_cq_req req = {};
+	struct irdma_cq_mr *cqmr;
+	struct irdma_pbl *iwpbl;
+	struct irdma_pbl *iwpbl_shadow;
+	struct irdma_cq_mr *cqmr_shadow;
+
+	if (ibcq->umem)
+		return -EOPNOTSUPP;
 
 	err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
 	if (err_code)
 		return err_code;
 
-	if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN ||
-		      udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN))
+	if (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN ||
+	    udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)
 		return -EINVAL;
 
 	err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num,
@@ -2516,7 +2518,6 @@ static int irdma_create_cq(struct ib_cq *ibcq,
 	ukinfo->cq_id = cq_num;
 	cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ?
 			 true : false;
-	cqe_size = cqe_64byte_ena ? 64 : 32;
 	ukinfo->avoid_mem_cflct = cqe_64byte_ena;
 	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
 	if (attr->comp_vector < rf->ceqs_count)
@@ -2526,110 +2527,203 @@ static int irdma_create_cq(struct ib_cq *ibcq,
 	info.type = IRDMA_CQ_TYPE_IWARP;
 	info.vsi = &iwdev->vsi;
 
-	if (udata) {
-		struct irdma_ucontext *ucontext;
-		struct irdma_create_cq_req req = {};
-		struct irdma_cq_mr *cqmr;
-		struct irdma_pbl *iwpbl;
-		struct irdma_pbl *iwpbl_shadow;
-		struct irdma_cq_mr *cqmr_shadow;
-
-		iwcq->user_mode = true;
-		ucontext =
-			rdma_udata_to_drv_context(udata, struct irdma_ucontext,
-						  ibucontext);
-		if (ib_copy_from_udata(&req, udata,
-				       min(sizeof(req), udata->inlen))) {
-			err_code = -EFAULT;
-			goto cq_free_rsrc;
-		}
+	iwcq->user_mode = true;
+	ucontext =
+		rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+					  ibucontext);
+	if (ib_copy_from_udata(&req, udata,
+			       min(sizeof(req), udata->inlen))) {
+		err_code = -EFAULT;
+		goto cq_free_rsrc;
+	}
 
+	spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+	iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf,
+			      &ucontext->cq_reg_mem_list);
+	spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+	if (!iwpbl) {
+		err_code = -EPROTO;
+		goto cq_free_rsrc;
+	}
+
+	cqmr = &iwpbl->cq_mr;
+
+	if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
+	    IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) {
 		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
-		iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf,
-				      &ucontext->cq_reg_mem_list);
+		iwpbl_shadow = irdma_get_pbl(
+				(unsigned long)req.user_shadow_area,
+				&ucontext->cq_reg_mem_list);
 		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
-		if (!iwpbl) {
+
+		if (!iwpbl_shadow) {
 			err_code = -EPROTO;
 			goto cq_free_rsrc;
 		}
+		cqmr_shadow = &iwpbl_shadow->cq_mr;
+		info.shadow_area_pa = cqmr_shadow->cq_pbl.addr;
+		cqmr->split = true;
+	} else {
+		info.shadow_area_pa = cqmr->shadow;
+	}
+	if (iwpbl->pbl_allocated) {
+		info.virtual_map = true;
+		info.pbl_chunk_size = 1;
+		info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+	} else {
+		info.cq_base_pa = cqmr->cq_pbl.addr;
+	}
 
-		cqmr = &iwpbl->cq_mr;
+	info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
+					 (u32)IRDMA_MAX_CQ_READ_THRESH);
 
-		if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
-		    IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) {
-			spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
-			iwpbl_shadow = irdma_get_pbl(
-					(unsigned long)req.user_shadow_area,
-					&ucontext->cq_reg_mem_list);
-			spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+	if (irdma_sc_cq_init(cq, &info)) {
+		ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n");
+		err_code = -EPROTO;
+		goto cq_free_rsrc;
+	}
 
-			if (!iwpbl_shadow) {
-				err_code = -EPROTO;
-				goto cq_free_rsrc;
-			}
-			cqmr_shadow = &iwpbl_shadow->cq_mr;
-			info.shadow_area_pa = cqmr_shadow->cq_pbl.addr;
-			cqmr->split = true;
-		} else {
-			info.shadow_area_pa = cqmr->shadow;
-		}
-		if (iwpbl->pbl_allocated) {
-			info.virtual_map = true;
-			info.pbl_chunk_size = 1;
-			info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
-		} else {
-			info.cq_base_pa = cqmr->cq_pbl.addr;
-		}
-	} else {
-		/* Kmode allocations */
-		int rsize;
+	cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+	if (!cqp_request) {
+		err_code = -ENOMEM;
+		goto cq_free_rsrc;
+	}
 
-		if (entries < 1 || entries > rf->max_cqe) {
-			err_code = -EINVAL;
-			goto cq_free_rsrc;
-		}
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.cq_create.cq = cq;
+	cqp_info->in.u.cq_create.check_overflow = true;
+	cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+	err_code = irdma_handle_cqp_op(rf, cqp_request);
+	irdma_put_cqp_request(&rf->cqp, cqp_request);
+	if (err_code)
+		goto cq_free_rsrc;
 
-		entries += 2;
-		if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
-			entries *= 2;
+	struct irdma_create_cq_resp resp = {};
 
-		if (entries & 1)
-			entries += 1; /* cq size must be an even number */
+	resp.cq_id = info.cq_uk_init_info.cq_id;
+	resp.cq_size = info.cq_uk_init_info.cq_size;
+	if (ib_copy_to_udata(udata, &resp,
+			     min(sizeof(resp), udata->outlen))) {
+		ibdev_dbg(&iwdev->ibdev,
+			  "VERBS: copy to user data\n");
+		err_code = -EPROTO;
+		goto cq_destroy;
+	}
 
-		if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
-			entries += 2;
+	init_completion(&iwcq->free_cq);
 
-		ukinfo->cq_size = entries;
+	/* Populate table entry after CQ is fully created. */
+	smp_store_release(&rf->cq_table[cq_num], iwcq);
 
-		if (cqe_64byte_ena)
-			rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe);
-		else
-			rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
-		iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256);
-		iwcq->kmem.va = dma_alloc_coherent(dev->hw->device,
-						   iwcq->kmem.size,
-						   &iwcq->kmem.pa, GFP_KERNEL);
-		if (!iwcq->kmem.va) {
-			err_code = -ENOMEM;
-			goto cq_free_rsrc;
-		}
+	return 0;
+cq_destroy:
+	irdma_cq_wq_destroy(rf, cq);
+cq_free_rsrc:
+	irdma_cq_free_rsrc(rf, iwcq);
 
-		iwcq->kmem_shadow.size = ALIGN(IRDMA_SHADOW_AREA_SIZE << 3,
-					       64);
-		iwcq->kmem_shadow.va = dma_alloc_coherent(dev->hw->device,
-							  iwcq->kmem_shadow.size,
-							  &iwcq->kmem_shadow.pa,
-							  GFP_KERNEL);
-		if (!iwcq->kmem_shadow.va) {
-			err_code = -ENOMEM;
-			goto cq_free_rsrc;
-		}
-		info.shadow_area_pa = iwcq->kmem_shadow.pa;
-		ukinfo->shadow_area = iwcq->kmem_shadow.va;
-		ukinfo->cq_base = iwcq->kmem.va;
-		info.cq_base_pa = iwcq->kmem.pa;
+	return err_code;
+}
+
+static int irdma_create_cq(struct ib_cq *ibcq,
+			   const struct ib_cq_init_attr *attr,
+			   struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	struct irdma_device *iwdev = to_iwdev(ibdev);
+	struct irdma_pci_f *rf = iwdev->rf;
+	struct irdma_cq *iwcq = to_iwcq(ibcq);
+	u32 cq_num = 0;
+	struct irdma_sc_cq *cq;
+	struct irdma_sc_dev *dev = &rf->sc_dev;
+	struct irdma_cq_init_info info = {};
+	struct irdma_cqp_request *cqp_request;
+	struct cqp_cmds_info *cqp_info;
+	struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+	int err_code;
+	int entries = attr->cqe;
+	bool cqe_64byte_ena;
+	u8 cqe_size;
+	int rsize;
+
+	err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
+	if (err_code)
+		return err_code;
+
+	err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num,
+				    &rf->next_cq);
+	if (err_code)
+		return err_code;
+
+	cq = &iwcq->sc_cq;
+	cq->back_cq = iwcq;
+	refcount_set(&iwcq->refcnt, 1);
+	spin_lock_init(&iwcq->lock);
+	INIT_LIST_HEAD(&iwcq->resize_list);
+	INIT_LIST_HEAD(&iwcq->cmpl_generated);
+	iwcq->cq_num = cq_num;
+	info.dev = dev;
+	ukinfo->cq_size = max(entries, 4);
+	ukinfo->cq_id = cq_num;
+	cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ?
+			 true : false;
+	cqe_size = cqe_64byte_ena ? 64 : 32;
+	ukinfo->avoid_mem_cflct = cqe_64byte_ena;
+	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+	if (attr->comp_vector < rf->ceqs_count)
+		info.ceq_id = attr->comp_vector;
+	info.ceq_id_valid = true;
+	info.ceqe_mask = 1;
+	info.type = IRDMA_CQ_TYPE_IWARP;
+	info.vsi = &iwdev->vsi;
+
+	/* Kmode allocations */
+	if (entries < 1 || entries > rf->max_cqe) {
+		err_code = -EINVAL;
+		goto cq_free_rsrc;
 	}
 
+	entries += 2;
+	if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+		entries *= 2;
+
+	if (entries & 1)
+		entries += 1; /* cq size must be an even number */
+
+	if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
+		entries += 2;
+
+	ukinfo->cq_size = entries;
+
+	if (cqe_64byte_ena)
+		rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe);
+	else
+		rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
+	iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256);
+	iwcq->kmem.va = dma_alloc_coherent(dev->hw->device,
+					   iwcq->kmem.size,
+					   &iwcq->kmem.pa, GFP_KERNEL);
+	if (!iwcq->kmem.va) {
+		err_code = -ENOMEM;
+		goto cq_free_rsrc;
+	}
+
+	iwcq->kmem_shadow.size = ALIGN(IRDMA_SHADOW_AREA_SIZE << 3,
+				       64);
+	iwcq->kmem_shadow.va = dma_alloc_coherent(dev->hw->device,
+						  iwcq->kmem_shadow.size,
+						  &iwcq->kmem_shadow.pa,
+						  GFP_KERNEL);
+	if (!iwcq->kmem_shadow.va) {
+		err_code = -ENOMEM;
+		goto cq_free_rsrc;
+	}
+	info.shadow_area_pa = iwcq->kmem_shadow.pa;
+	ukinfo->shadow_area = iwcq->kmem_shadow.va;
+	ukinfo->cq_base = iwcq->kmem.va;
+	info.cq_base_pa = iwcq->kmem.pa;
+
 	info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
 					 (u32)IRDMA_MAX_CQ_READ_THRESH);
 
@@ -2656,28 +2750,13 @@ static int irdma_create_cq(struct ib_cq *ibcq,
 	if (err_code)
 		goto cq_free_rsrc;
 
-	if (udata) {
-		struct irdma_create_cq_resp resp = {};
-
-		resp.cq_id = info.cq_uk_init_info.cq_id;
-		resp.cq_size = info.cq_uk_init_info.cq_size;
-		if (ib_copy_to_udata(udata, &resp,
-				     min(sizeof(resp), udata->outlen))) {
-			ibdev_dbg(&iwdev->ibdev,
-				  "VERBS: copy to user data\n");
-			err_code = -EPROTO;
-			goto cq_destroy;
-		}
-	}
-
 	init_completion(&iwcq->free_cq);
 
 	/* Populate table entry after CQ is fully created. */
 	smp_store_release(&rf->cq_table[cq_num], iwcq);
 
 	return 0;
-cq_destroy:
-	irdma_cq_wq_destroy(rf, cq);
+
 cq_free_rsrc:
 	irdma_cq_free_rsrc(rf, iwcq);
 
@@ -5355,6 +5434,7 @@ static const struct ib_device_ops irdma_dev_ops = {
 	.alloc_pd = irdma_alloc_pd,
 	.alloc_ucontext = irdma_alloc_ucontext,
 	.create_cq = irdma_create_cq,
+	.create_user_cq = irdma_create_user_cq,
 	.create_qp = irdma_create_qp,
 	.dealloc_driver = irdma_ib_dealloc_device,
 	.dealloc_mw = irdma_dealloc_mw,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 24/50] RDMA/usnic: Provide a modern CQ creation interface
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

usnic doesn't support kernel verbs and should have only
.create_user_cq() callback.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/usnic/usnic_ib_main.c  | 2 +-
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 6 +++---
 drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 11eca39b73a9..8a3b641d6059 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -356,7 +356,7 @@ static const struct ib_device_ops usnic_dev_ops = {
 
 	.alloc_pd = usnic_ib_alloc_pd,
 	.alloc_ucontext = usnic_ib_alloc_ucontext,
-	.create_cq = usnic_ib_create_cq,
+	.create_user_cq = usnic_ib_create_user_cq,
 	.create_qp = usnic_ib_create_qp,
 	.dealloc_pd = usnic_ib_dealloc_pd,
 	.dealloc_ucontext = usnic_ib_dealloc_ucontext,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index ae5df96589d9..2b41ded14a65 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -576,10 +576,10 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	return status;
 }
 
-int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		       struct uverbs_attr_bundle *attrs)
+int usnic_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			    struct uverbs_attr_bundle *attrs)
 {
-	if (attr->flags)
+	if (attr->flags || ibcq->umem)
 		return -EOPNOTSUPP;
 
 	return 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index e3031ac32488..15882110a5d5 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -55,8 +55,8 @@ int usnic_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
 int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
 int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				int attr_mask, struct ib_udata *udata);
-int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		       struct uverbs_attr_bundle *attrs);
+int usnic_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			    struct uverbs_attr_bundle *attrs);
 int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
 				u64 virt_addr, int access_flags,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 25/50] RDMA/mana: Provide a modern CQ creation interface
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The uverbs CQ creation UAPI allows users to supply their own umem for a CQ.
Update mana to support this workflow while preserving support for creating
umem through the legacy interface.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mana/cq.c      | 128 +++++++++++++++++++++++------------
 drivers/infiniband/hw/mana/device.c  |   1 +
 drivers/infiniband/hw/mana/main.c    |  25 +++----
 drivers/infiniband/hw/mana/mana_ib.h |   4 +-
 drivers/infiniband/hw/mana/qp.c      |  42 ++++++++++--
 drivers/infiniband/hw/mana/wq.c      |  14 +++-
 6 files changed, 147 insertions(+), 67 deletions(-)

diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 2dce1b677115..605122ecf9f9 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -5,8 +5,8 @@
 
 #include "mana_ib.h"
 
-int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		      struct uverbs_attr_bundle *attrs)
+int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			   struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
@@ -17,7 +17,6 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct mana_ib_dev *mdev;
 	bool is_rnic_cq;
 	u32 doorbell;
-	u32 buf_size;
 	int err;
 
 	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -26,44 +25,100 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	cq->cq_handle = INVALID_MANA_HANDLE;
 	is_rnic_cq = mana_ib_is_rnic(mdev);
 
-	if (udata) {
-		if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
-			return -EINVAL;
+	if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+		return -EINVAL;
 
-		err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
-		if (err) {
-			ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
-			return err;
-		}
+	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+	if (err) {
+		ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+		return err;
+	}
 
-		if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
-		    attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
-			ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
-			return -EINVAL;
-		}
+	if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+	    attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+		ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+		return -EINVAL;
+	}
+
+	cq->cqe = attr->cqe;
+	if (!ibcq->umem)
+		ibcq->umem = ib_umem_get(ibdev, ucmd.buf_addr,
+				     cq->cqe * COMP_ENTRY_SIZE,
+				     IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(ibcq->umem))
+		return PTR_ERR(ibcq->umem);
+	cq->queue.umem = ibcq->umem;
+
+	err = mana_ib_create_queue(mdev, &cq->queue);
+	if (err)
+		return err;
 
-		cq->cqe = attr->cqe;
-		err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
-					   &cq->queue);
+	mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+						  ibucontext);
+	doorbell = mana_ucontext->doorbell;
+
+	if (is_rnic_cq) {
+		err = mana_ib_gd_create_cq(mdev, cq, doorbell);
 		if (err) {
-			ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
-			return err;
+			ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
+			goto err_destroy_queue;
 		}
 
-		mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
-							  ibucontext);
-		doorbell = mana_ucontext->doorbell;
-	} else {
-		buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
-		cq->cqe = buf_size / COMP_ENTRY_SIZE;
-		err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+		err = mana_ib_install_cq_cb(mdev, cq);
 		if (err) {
-			ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
-			return err;
+			ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
+			goto err_destroy_rnic_cq;
 		}
-		doorbell = mdev->gdma_dev->doorbell;
 	}
 
+	resp.cqid = cq->queue.id;
+	err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+		goto err_remove_cq_cb;
+	}
+
+	spin_lock_init(&cq->cq_lock);
+	INIT_LIST_HEAD(&cq->list_send_qp);
+	INIT_LIST_HEAD(&cq->list_recv_qp);
+
+	return 0;
+
+err_remove_cq_cb:
+	mana_ib_remove_cq_cb(mdev, cq);
+err_destroy_rnic_cq:
+	mana_ib_gd_destroy_cq(mdev, cq);
+err_destroy_queue:
+	mana_ib_destroy_queue(mdev, &cq->queue);
+	return err;
+}
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		      struct uverbs_attr_bundle *attrs)
+{
+	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+	struct ib_device *ibdev = ibcq->device;
+	struct mana_ib_dev *mdev;
+	bool is_rnic_cq;
+	u32 doorbell;
+	u32 buf_size;
+	int err;
+
+	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+	cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
+	cq->cq_handle = INVALID_MANA_HANDLE;
+	is_rnic_cq = mana_ib_is_rnic(mdev);
+
+	buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+	cq->cqe = buf_size / COMP_ENTRY_SIZE;
+	err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+	if (err) {
+		ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+		return err;
+	}
+	doorbell = mdev->gdma_dev->doorbell;
+
 	if (is_rnic_cq) {
 		err = mana_ib_gd_create_cq(mdev, cq, doorbell);
 		if (err) {
@@ -78,23 +133,12 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		}
 	}
 
-	if (udata) {
-		resp.cqid = cq->queue.id;
-		err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
-			goto err_remove_cq_cb;
-		}
-	}
-
 	spin_lock_init(&cq->cq_lock);
 	INIT_LIST_HEAD(&cq->list_send_qp);
 	INIT_LIST_HEAD(&cq->list_recv_qp);
 
 	return 0;
 
-err_remove_cq_cb:
-	mana_ib_remove_cq_cb(mdev, cq);
 err_destroy_rnic_cq:
 	mana_ib_gd_destroy_cq(mdev, cq);
 err_destroy_queue:
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index ccc2279ca63c..c5c5fe051424 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -21,6 +21,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
 	.alloc_ucontext = mana_ib_alloc_ucontext,
 	.create_ah = mana_ib_create_ah,
 	.create_cq = mana_ib_create_cq,
+	.create_user_cq = mana_ib_create_user_cq,
 	.create_qp = mana_ib_create_qp,
 	.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
 	.create_wq = mana_ib_create_wq,
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index fac159f7128d..a871b8287dc9 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -261,35 +261,26 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
 	return 0;
 }
 
-int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+int mana_ib_create_queue(struct mana_ib_dev *mdev,
 			 struct mana_ib_queue *queue)
 {
-	struct ib_umem *umem;
 	int err;
 
-	queue->umem = NULL;
 	queue->id = INVALID_QUEUE_ID;
 	queue->gdma_region = GDMA_INVALID_DMA_REGION;
 
-	umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
-	if (IS_ERR(umem)) {
-		ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
-		return PTR_ERR(umem);
-	}
-
-	err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
+	err = mana_ib_create_zero_offset_dma_region(mdev, queue->umem,
+						    &queue->gdma_region);
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
-		goto free_umem;
+		ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n",
+			  err);
+		return err;
 	}
-	queue->umem = umem;
 
-	ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
+	ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n",
+		  queue->gdma_region);
 
 	return 0;
-free_umem:
-	ib_umem_release(umem);
-	return err;
 }
 
 void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue)
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index a7c8c0fd7019..3bc7c88dc136 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -624,7 +624,7 @@ int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
 
 int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
 				struct mana_ib_queue *queue);
-int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+int mana_ib_create_queue(struct mana_ib_dev *mdev,
 			 struct mana_ib_queue *queue);
 void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
 
@@ -667,6 +667,8 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
 
 int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		      struct uverbs_attr_bundle *attrs);
+int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			   struct uverbs_attr_bundle *attrs);
 
 int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 48c1f4977f21..b08dbc675741 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -326,11 +326,20 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
 		  ucmd.sq_buf_addr, ucmd.port);
 
-	err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq);
+	qp->raw_sq.umem = ib_umem_get(&mdev->ib_dev, ucmd.sq_buf_addr,
+				      ucmd.sq_buf_size, IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(qp->raw_sq.umem)) {
+		err = PTR_ERR(qp->raw_sq.umem);
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to get umem for qp-raw, err %d\n", err);
+		goto err_free_vport;
+	}
+
+	err = mana_ib_create_queue(mdev, &qp->raw_sq);
 	if (err) {
 		ibdev_dbg(&mdev->ib_dev,
 			  "Failed to create queue for create qp-raw, err %d\n", err);
-		goto err_free_vport;
+		goto err_release_umem;
 	}
 
 	/* Create a WQ on the same port handle used by the Ethernet */
@@ -391,6 +400,10 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 
 err_destroy_queue:
 	mana_ib_destroy_queue(mdev, &qp->raw_sq);
+	return err;
+
+err_release_umem:
+	ib_umem_release(qp->raw_sq.umem);
 
 err_free_vport:
 	mana_ib_uncfg_vport(mdev, pd, port);
@@ -553,13 +566,25 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
 		if (i == MANA_RC_SEND_QUEUE_FMR) {
 			qp->rc_qp.queues[i].id = INVALID_QUEUE_ID;
 			qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+			qp->rc_qp.queues[i].umem = NULL;
 			continue;
 		}
-		err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
-					   &qp->rc_qp.queues[i]);
+		qp->rc_qp.queues[i].umem = ib_umem_get(&mdev->ib_dev,
+						       ucmd.queue_buf[j],
+						       ucmd.queue_size[j],
+						       IB_ACCESS_LOCAL_WRITE);
+		if (IS_ERR(qp->rc_qp.queues[i].umem)) {
+			err = PTR_ERR(qp->rc_qp.queues[i].umem);
+			ibdev_err(&mdev->ib_dev, "Failed to get umem for queue %d, err %d\n",
+				  i, err);
+			goto release_umems;
+		}
+
+		err = mana_ib_create_queue(mdev, &qp->rc_qp.queues[i]);
 		if (err) {
 			ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
-			goto destroy_queues;
+			ib_umem_release(qp->rc_qp.queues[i].umem);
+			goto release_umems;
 		}
 		j++;
 	}
@@ -598,6 +623,13 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	while (i-- > 0)
 		mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
 	return err;
+
+release_umems:
+	while (i-- > 0) {
+		if (i != MANA_RC_SEND_QUEUE_FMR)
+			ib_umem_release(qp->rc_qp.queues[i].umem);
+	}
+	return err;
 }
 
 static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
index f959f4b9244f..be474aa8bdfc 100644
--- a/drivers/infiniband/hw/mana/wq.c
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -31,11 +31,19 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
 
 	ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
 
-	err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue);
+	wq->queue.umem = ib_umem_get(&mdev->ib_dev, ucmd.wq_buf_addr,
+				     ucmd.wq_buf_size, IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(wq->queue.umem)) {
+		err = PTR_ERR(wq->queue.umem);
+		ibdev_dbg(&mdev->ib_dev, "Failed to get umem for create wq, %d\n", err);
+		goto err_free_wq;
+	}
+
+	err = mana_ib_create_queue(mdev, &wq->queue);
 	if (err) {
 		ibdev_dbg(&mdev->ib_dev,
 			  "Failed to create queue for create wq, %d\n", err);
-		goto err_free_wq;
+		goto err_release_umem;
 	}
 
 	wq->wqe = init_attr->max_wr;
@@ -43,6 +51,8 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
 	wq->rx_object = INVALID_MANA_HANDLE;
 	return &wq->ibwq;
 
+err_release_umem:
+	ib_umem_release(wq->queue.umem);
 err_free_wq:
 	kfree(wq);
 

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 22/50] RDMA/ocrdma: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  |  1 +
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 56 +++++++++++++++++++----------
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h |  3 ++
 3 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 5d4b3bc16493..0d89c5ec9a7a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -141,6 +141,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
 	.create_cq = ocrdma_create_cq,
 	.create_qp = ocrdma_create_qp,
 	.create_user_ah = ocrdma_create_ah,
+	.create_user_cq = ocrdma_create_user_cq,
 	.dealloc_pd = ocrdma_dealloc_pd,
 	.dealloc_ucontext = ocrdma_dealloc_ucontext,
 	.dereg_mr = ocrdma_dereg_mr,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index bf9211d8d130..034d8b937a77 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -966,8 +966,9 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
 	return status;
 }
 
-int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		     struct uverbs_attr_bundle *attrs)
+int ocrdma_create_user_cq(struct ib_cq *ibcq,
+			  const struct ib_cq_init_attr *attr,
+			  struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
@@ -976,36 +977,29 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
 	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
 		udata, struct ocrdma_ucontext, ibucontext);
-	u16 pd_id = 0;
 	int status;
 	struct ocrdma_create_cq_ureq ureq;
 
-	if (attr->flags)
+	if (attr->flags || ibcq->umem)
 		return -EOPNOTSUPP;
 
-	if (udata) {
-		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
-			return -EFAULT;
-	} else
-		ureq.dpp_cq = 0;
+	if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+		return -EFAULT;
 
 	spin_lock_init(&cq->cq_lock);
 	spin_lock_init(&cq->comp_handler_lock);
 	INIT_LIST_HEAD(&cq->sq_head);
 	INIT_LIST_HEAD(&cq->rq_head);
 
-	if (udata)
-		pd_id = uctx->cntxt_pd->id;
-
-	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
+	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq,
+				      uctx->cntxt_pd->id);
 	if (status)
 		return status;
 
-	if (udata) {
-		status = ocrdma_copy_cq_uresp(dev, cq, udata);
-		if (status)
-			goto ctx_err;
-	}
+	status = ocrdma_copy_cq_uresp(dev, cq, udata);
+	if (status)
+		goto ctx_err;
+
 	cq->phase = OCRDMA_CQE_VALID;
 	dev->cq_tbl[cq->id] = cq;
 	return 0;
@@ -1015,6 +1009,32 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	return status;
 }
 
+int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		     struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	int entries = attr->cqe;
+	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+	int status;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	spin_lock_init(&cq->cq_lock);
+	spin_lock_init(&cq->comp_handler_lock);
+	INIT_LIST_HEAD(&cq->sq_head);
+	INIT_LIST_HEAD(&cq->rq_head);
+
+	status = ocrdma_mbx_create_cq(dev, cq, entries, 0, 0);
+	if (status)
+		return status;
+
+	cq->phase = OCRDMA_CQE_VALID;
+	dev->cq_tbl[cq->id] = cq;
+	return 0;
+}
+
 int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
 		     struct ib_udata *udata)
 {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 6c5c3755b8a9..4a572608fd9f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -71,6 +71,9 @@ int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 
 int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		     struct uverbs_attr_bundle *attrs);
+int ocrdma_create_user_cq(struct ib_cq *ibcq,
+			  const struct ib_cq_init_attr *attr,
+			  struct uverbs_attr_bundle *attrs);
 int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
 int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 27/50] RDMA/rdmavt: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/sw/rdmavt/cq.c | 144 +++++++++++++++++++++++++++-----------
 drivers/infiniband/sw/rdmavt/cq.h |   2 +
 drivers/infiniband/sw/rdmavt/vt.c |   1 +
 3 files changed, 106 insertions(+), 41 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index e7835ca70e2b..db86eb026bb3 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -147,33 +147,32 @@ static void send_complete(struct work_struct *work)
 }
 
 /**
- * rvt_create_cq - create a completion queue
+ * rvt_create_user_cq - create a completion queue for userspace
  * @ibcq: Allocated CQ
  * @attr: creation attributes
  * @attrs: uverbs bundle
  *
- * Called by ib_create_cq() in the generic verbs code.
+ * Called by ib_create_cq() in the generic verbs code for userspace CQs.
  *
  * Return: 0 on success
  */
-int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		  struct uverbs_attr_bundle *attrs)
+int rvt_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
 	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
 	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
-	struct rvt_cq_wc *u_wc = NULL;
-	struct rvt_k_cq_wc *k_wc = NULL;
+	struct rvt_cq_wc *u_wc;
 	u32 sz;
 	unsigned int entries = attr->cqe;
 	int comp_vector = attr->comp_vector;
 	int err;
 
-	if (attr->flags)
+	if (attr->flags || ibcq->umem)
 		return -EOPNOTSUPP;
 
-	if (entries < 1 || entries > rdi->dparms.props.max_cqe)
+	if (entries > rdi->dparms.props.max_cqe)
 		return -EINVAL;
 
 	if (comp_vector < 0)
@@ -188,37 +187,27 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	 * We need to use vmalloc() in order to support mmap and large
 	 * numbers of entries.
 	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
-		sz += sizeof(*u_wc);
-		u_wc = vmalloc_user(sz);
-		if (!u_wc)
-			return -ENOMEM;
-	} else {
-		sz = sizeof(struct ib_wc) * (entries + 1);
-		sz += sizeof(*k_wc);
-		k_wc = vzalloc_node(sz, rdi->dparms.node);
-		if (!k_wc)
-			return -ENOMEM;
-	}
+	sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
+	sz += sizeof(*u_wc);
+	u_wc = vmalloc_user(sz);
+	if (!u_wc)
+		return -ENOMEM;
 
 	/*
 	 * Return the address of the WC as the offset to mmap.
 	 * See rvt_mmap() for details.
 	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
-		if (IS_ERR(cq->ip)) {
-			err = PTR_ERR(cq->ip);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err)
-			goto bail_ip;
+	cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
+	if (IS_ERR(cq->ip)) {
+		err = PTR_ERR(cq->ip);
+		goto bail_wc;
 	}
 
+	err = ib_copy_to_udata(udata, &cq->ip->offset,
+			       sizeof(cq->ip->offset));
+	if (err)
+		goto bail_ip;
+
 	spin_lock_irq(&rdi->n_cqs_lock);
 	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
 		spin_unlock_irq(&rdi->n_cqs_lock);
@@ -229,11 +218,9 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	rdi->n_cqs_allocated++;
 	spin_unlock_irq(&rdi->n_cqs_lock);
 
-	if (cq->ip) {
-		spin_lock_irq(&rdi->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
-		spin_unlock_irq(&rdi->pending_lock);
-	}
+	spin_lock_irq(&rdi->pending_lock);
+	list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
+	spin_unlock_irq(&rdi->pending_lock);
 
 	/*
 	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
@@ -252,10 +239,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	cq->notify = RVT_CQ_NONE;
 	spin_lock_init(&cq->lock);
 	INIT_WORK(&cq->comptask, send_complete);
-	if (u_wc)
-		cq->queue = u_wc;
-	else
-		cq->kqueue = k_wc;
+	cq->queue = u_wc;
 
 	trace_rvt_create_cq(cq, attr);
 	return 0;
@@ -264,6 +248,84 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	kfree(cq->ip);
 bail_wc:
 	vfree(u_wc);
+	return err;
+}
+
+/**
+ * rvt_create_cq - create a completion queue for kernel
+ * @ibcq: Allocated CQ
+ * @attr: creation attributes
+ * @attrs: uverbs bundle
+ *
+ * Called by ib_create_cq() in the generic verbs code for kernel CQs.
+ *
+ * Return: 0 on success
+ */
+int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		  struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *ibdev = ibcq->device;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_k_cq_wc *k_wc;
+	u32 sz;
+	unsigned int entries = attr->cqe;
+	int comp_vector = attr->comp_vector;
+	int err;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (entries > rdi->dparms.props.max_cqe)
+		return -EINVAL;
+
+	if (comp_vector < 0)
+		comp_vector = 0;
+
+	comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
+
+	/*
+	 * Allocate the completion queue entries and head/tail pointers.
+	 */
+	sz = sizeof(struct ib_wc) * (entries + 1);
+	sz += sizeof(*k_wc);
+	k_wc = vzalloc_node(sz, rdi->dparms.node);
+	if (!k_wc)
+		return -ENOMEM;
+
+	spin_lock_irq(&rdi->n_cqs_lock);
+	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
+		spin_unlock_irq(&rdi->n_cqs_lock);
+		err = -ENOMEM;
+		goto bail_wc;
+	}
+
+	rdi->n_cqs_allocated++;
+	spin_unlock_irq(&rdi->n_cqs_lock);
+
+	/*
+	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
+	 * The number of entries should be >= the number requested or return
+	 * an error.
+	 */
+	cq->rdi = rdi;
+	if (rdi->driver_f.comp_vect_cpu_lookup)
+		cq->comp_vector_cpu =
+			rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
+	else
+		cq->comp_vector_cpu =
+			cpumask_first(cpumask_of_node(rdi->dparms.node));
+
+	cq->ibcq.cqe = entries;
+	cq->notify = RVT_CQ_NONE;
+	spin_lock_init(&cq->lock);
+	INIT_WORK(&cq->comptask, send_complete);
+	cq->kqueue = k_wc;
+
+	trace_rvt_create_cq(cq, attr);
+	return 0;
+
+bail_wc:
 	vfree(k_wc);
 	return err;
 }
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 4028702a7b2f..14ee2705c443 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -11,6 +11,8 @@
 
 int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		  struct uverbs_attr_bundle *attrs);
+int rvt_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs);
 int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index d22d610c2696..15964400b8d3 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -333,6 +333,7 @@ static const struct ib_device_ops rvt_dev_ops = {
 	.attach_mcast = rvt_attach_mcast,
 	.create_ah = rvt_create_ah,
 	.create_cq = rvt_create_cq,
+	.create_user_cq = rvt_create_user_cq,
 	.create_qp = rvt_create_qp,
 	.create_srq = rvt_create_srq,
 	.create_user_ah = rvt_create_ah,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 28/50] RDMA/siw: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_main.c  |   1 +
 drivers/infiniband/sw/siw/siw_verbs.c | 111 +++++++++++++++++++++++-----------
 drivers/infiniband/sw/siw/siw_verbs.h |   2 +
 3 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 5168307229a9..75dcf3578eac 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -232,6 +232,7 @@ static const struct ib_device_ops siw_device_ops = {
 	.alloc_pd = siw_alloc_pd,
 	.alloc_ucontext = siw_alloc_ucontext,
 	.create_cq = siw_create_cq,
+	.create_user_cq = siw_create_user_cq,
 	.create_qp = siw_create_qp,
 	.create_srq = siw_create_srq,
 	.dealloc_driver = siw_device_cleanup,
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index efa2f097b582..92b25b389b69 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1139,15 +1139,15 @@ int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
  * @attrs: uverbs bundle
  */
 
-int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
-		  struct uverbs_attr_bundle *attrs)
+int siw_create_user_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct siw_device *sdev = to_siw_dev(base_cq->device);
 	struct siw_cq *cq = to_siw_cq(base_cq);
 	int rv, size = attr->cqe;
 
-	if (attr->flags)
+	if (attr->flags || base_cq->umem)
 		return -EOPNOTSUPP;
 
 	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
@@ -1155,7 +1155,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 		rv = -ENOMEM;
 		goto err_out;
 	}
-	if (size < 1 || size > sdev->attrs.max_cqe) {
+	if (attr->cqe > sdev->attrs.max_cqe) {
 		siw_dbg(base_cq->device, "CQ size error: %d\n", size);
 		rv = -EINVAL;
 		goto err_out;
@@ -1164,13 +1164,8 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 	cq->base_cq.cqe = size;
 	cq->num_cqe = size;
 
-	if (udata)
-		cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
-					 sizeof(struct siw_cq_ctrl));
-	else
-		cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
-				    sizeof(struct siw_cq_ctrl));
-
+	cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
+				 sizeof(struct siw_cq_ctrl));
 	if (cq->queue == NULL) {
 		rv = -ENOMEM;
 		goto err_out;
@@ -1182,33 +1177,32 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 
 	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
 
-	if (udata) {
-		struct siw_uresp_create_cq uresp = {};
-		struct siw_ucontext *ctx =
-			rdma_udata_to_drv_context(udata, struct siw_ucontext,
-						  base_ucontext);
-		size_t length = size * sizeof(struct siw_cqe) +
-			sizeof(struct siw_cq_ctrl);
+	struct siw_uresp_create_cq uresp = {};
+	struct siw_ucontext *ctx =
+		rdma_udata_to_drv_context(udata, struct siw_ucontext,
+					  base_ucontext);
+	size_t length = size * sizeof(struct siw_cqe) +
+		sizeof(struct siw_cq_ctrl);
 
-		cq->cq_entry =
-			siw_mmap_entry_insert(ctx, cq->queue,
-					      length, &uresp.cq_key);
-		if (!cq->cq_entry) {
-			rv = -ENOMEM;
-			goto err_out;
-		}
+	cq->cq_entry =
+		siw_mmap_entry_insert(ctx, cq->queue,
+				      length, &uresp.cq_key);
+	if (!cq->cq_entry) {
+		rv = -ENOMEM;
+		goto err_out;
+	}
 
-		uresp.cq_id = cq->id;
-		uresp.num_cqe = size;
+	uresp.cq_id = cq->id;
+	uresp.num_cqe = size;
 
-		if (udata->outlen < sizeof(uresp)) {
-			rv = -EINVAL;
-			goto err_out;
-		}
-		rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
-		if (rv)
-			goto err_out;
+	if (udata->outlen < sizeof(uresp)) {
+		rv = -EINVAL;
+		goto err_out;
 	}
+	rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rv)
+		goto err_out;
+
 	return 0;
 
 err_out:
@@ -1227,6 +1221,55 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 	return rv;
 }
 
+int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+		  struct uverbs_attr_bundle *attrs)
+{
+	struct siw_device *sdev = to_siw_dev(base_cq->device);
+	struct siw_cq *cq = to_siw_cq(base_cq);
+	int rv, size = attr->cqe;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
+		siw_dbg(base_cq->device, "too many CQ's\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (size < 1 || size > sdev->attrs.max_cqe) {
+		siw_dbg(base_cq->device, "CQ size error: %d\n", size);
+		rv = -EINVAL;
+		goto err_out;
+	}
+	size = roundup_pow_of_two(size);
+	cq->base_cq.cqe = size;
+	cq->num_cqe = size;
+
+	cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
+			    sizeof(struct siw_cq_ctrl));
+	if (cq->queue == NULL) {
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	get_random_bytes(&cq->id, 4);
+	siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id);
+
+	spin_lock_init(&cq->lock);
+
+	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
+
+	return 0;
+
+err_out:
+	siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
+
+	if (cq->queue)
+		vfree(cq->queue);
+	atomic_dec(&sdev->num_cq);
+
+	return rv;
+}
+
 /*
  * siw_poll_cq()
  *
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index e9f4463aecdc..527c356b55af 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -44,6 +44,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
 		     struct ib_udata *udata);
 int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 		  struct uverbs_attr_bundle *attrs);
+int siw_create_user_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs);
 int siw_query_port(struct ib_device *base_dev, u32 port,
 		   struct ib_port_attr *attr);
 int siw_query_gid(struct ib_device *base_dev, u32 port, int idx,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Split CQ creation into distinct kernel and user flows. The hns driver,
inherited from mlx4, uses a problematic pattern that shares and caches
umem in hns_roce_db_map_user(). This design blocks the driver from
supporting generic umem sources (VMA, dmabuf, memfd, and others).

In addition, let's delete counter that counts CQ creation errors. There
are multiple ways to debug kernel in modern kernel without need to rely
on that debugfs counter.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_cq.c      | 103 ++++++++++++++++++++-------
 drivers/infiniband/hw/hns/hns_roce_debugfs.c |   1 -
 drivers/infiniband/hw/hns/hns_roce_device.h  |   3 +-
 drivers/infiniband/hw/hns/hns_roce_main.c    |   1 +
 4 files changed, 82 insertions(+), 26 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 857a913326cd..0f24a916466b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -335,7 +335,10 @@ static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
 {
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 
-	if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > hr_dev->caps.max_cqes) {
 		ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
 			  attr->cqe, hr_dev->caps.max_cqes);
 		return -EINVAL;
@@ -407,8 +410,8 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
 	return 0;
 }
 
-int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
-		       struct uverbs_attr_bundle *attrs)
+int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+			    struct uverbs_attr_bundle *attrs)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
 	struct ib_udata *udata = &attrs->driver_udata;
@@ -418,31 +421,27 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 	struct hns_roce_ib_create_cq ucmd = {};
 	int ret;
 
-	if (attr->flags) {
-		ret = -EOPNOTSUPP;
-		goto err_out;
-	}
+	if (ib_cq->umem)
+		return -EOPNOTSUPP;
 
 	ret = verify_cq_create_attr(hr_dev, attr);
 	if (ret)
-		goto err_out;
+		return ret;
 
-	if (udata) {
-		ret = get_cq_ucmd(hr_cq, udata, &ucmd);
-		if (ret)
-			goto err_out;
-	}
+	ret = get_cq_ucmd(hr_cq, udata, &ucmd);
+	if (ret)
+		return ret;
 
 	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
 
 	ret = set_cqe_size(hr_cq, udata, &ucmd);
 	if (ret)
-		goto err_out;
+		return ret;
 
 	ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
 	if (ret) {
 		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
-		goto err_out;
+		return ret;
 	}
 
 	ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
@@ -464,13 +463,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 		goto err_cqn;
 	}
 
-	if (udata) {
-		resp.cqn = hr_cq->cqn;
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
-		if (ret)
-			goto err_cqc;
-	}
+	resp.cqn = hr_cq->cqn;
+	ret = ib_copy_to_udata(udata, &resp,
+			       min(udata->outlen, sizeof(resp)));
+	if (ret)
+		goto err_cqc;
 
 	hr_cq->cons_index = 0;
 	hr_cq->arm_sn = 1;
@@ -487,9 +484,67 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 	free_cq_db(hr_dev, hr_cq, udata);
 err_cq_buf:
 	free_cq_buf(hr_dev, hr_cq);
-err_out:
-	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
+	return ret;
+}
+
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+	struct hns_roce_ib_create_cq_resp resp = {};
+	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_ib_create_cq ucmd = {};
+	int ret;
+
+	ret = verify_cq_create_attr(hr_dev, attr);
+	if (ret)
+		return ret;
+
+	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
+
+	ret = set_cqe_size(hr_cq, NULL, &ucmd);
+	if (ret)
+		return ret;
 
+	ret = alloc_cq_buf(hr_dev, hr_cq, NULL, 0);
+	if (ret) {
+		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = alloc_cq_db(hr_dev, hr_cq, NULL, 0, &resp);
+	if (ret) {
+		ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
+		goto err_cq_buf;
+	}
+
+	ret = alloc_cqn(hr_dev, hr_cq, NULL);
+	if (ret) {
+		ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+		goto err_cq_db;
+	}
+
+	ret = alloc_cqc(hr_dev, hr_cq);
+	if (ret) {
+		ibdev_err(ibdev,
+			  "failed to alloc CQ context, ret = %d.\n", ret);
+		goto err_cqn;
+	}
+
+	hr_cq->cons_index = 0;
+	hr_cq->arm_sn = 1;
+	refcount_set(&hr_cq->refcount, 1);
+	init_completion(&hr_cq->free);
+
+	return 0;
+
+err_cqn:
+	free_cqn(hr_dev, hr_cq->cqn);
+err_cq_db:
+	free_cq_db(hr_dev, hr_cq, NULL);
+err_cq_buf:
+	free_cq_buf(hr_dev, hr_cq);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
index b869cdc54118..481b30f2f5b5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -47,7 +47,6 @@ static const char * const sw_stat_info[] = {
 	[HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
 	[HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
 	[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
-	[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
 	[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
 	[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
 	[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3f032b8038af..fdc5f487d7a3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -902,7 +902,6 @@ enum hns_roce_sw_dfx_stat_index {
 	HNS_ROCE_DFX_MBX_EVENT_CNT,
 	HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
 	HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
-	HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
 	HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
 	HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
 	HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
@@ -1295,6 +1294,8 @@ int to_hr_qp_type(int qp_type);
 
 int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 		       struct uverbs_attr_bundle *attrs);
+int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+			    struct uverbs_attr_bundle *attrs);
 
 int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
 int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index a3490bab297a..64de49bf8df7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -727,6 +727,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	.create_ah = hns_roce_create_ah,
 	.create_user_ah = hns_roce_create_ah,
 	.create_cq = hns_roce_create_cq,
+	.create_user_cq = hns_roce_create_user_cq,
 	.create_qp = hns_roce_create_qp,
 	.dealloc_pd = hns_roce_dealloc_pd,
 	.dealloc_ucontext = hns_roce_dealloc_ucontext,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 30/50] RDMA/core: Remove legacy CQ creation fallback path
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

All drivers now support the modern CQ creation interface via the
create_user_cq callback. Remove the legacy fallback to create_cq
for userspace CQ creation.

This simplifies the core code by eliminating conditional logic and
ensures all userspace CQ creation goes through the modern interface
that properly supports user-supplied umem.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/uverbs_cmd.c          | 9 +++------
 drivers/infiniband/core/uverbs_std_types_cq.c | 8 ++------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 041bed7a43b4..cdfee86fb800 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1071,10 +1071,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
 	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
 	rdma_restrack_set_name(&cq->res, NULL);
 
-	if (ib_dev->ops.create_user_cq)
-		ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
-	else
-		ret = ib_dev->ops.create_cq(cq, &attr, attrs);
+	ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
 	if (ret)
 		goto err_free;
 	rdma_restrack_add(&cq->res);
@@ -3791,7 +3788,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
 				     UAPI_DEF_WRITE_UDATA_IO(
 					     struct ib_uverbs_create_cq,
 					     struct ib_uverbs_create_cq_resp),
-				     UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+				     UAPI_DEF_METHOD_NEEDS_FN(create_user_cq)),
 		DECLARE_UVERBS_WRITE(
 			IB_USER_VERBS_CMD_DESTROY_CQ,
 			ib_uverbs_destroy_cq,
@@ -3822,7 +3819,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
 					     reserved,
 					     struct ib_uverbs_ex_create_cq_resp,
 					     response_length),
-			UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+			UAPI_DEF_METHOD_NEEDS_FN(create_user_cq)),
 		DECLARE_UVERBS_WRITE_EX(
 			IB_USER_VERBS_EX_CMD_MODIFY_CQ,
 			ib_uverbs_ex_modify_cq,
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index d2c8f71f934c..a12e3184dd5c 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -78,8 +78,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 	int buffer_fd;
 	int ret;
 
-	if ((!ib_dev->ops.create_cq && !ib_dev->ops.create_user_cq) ||
-	    !ib_dev->ops.destroy_cq)
+	if (!ib_dev->ops.create_user_cq || !ib_dev->ops.destroy_cq)
 		return -EOPNOTSUPP;
 
 	ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -200,10 +199,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
 	rdma_restrack_set_name(&cq->res, NULL);
 
-	if (ib_dev->ops.create_user_cq)
-		ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
-	else
-		ret = ib_dev->ops.create_cq(cq, &attr, attrs);
+	ret = ib_dev->ops.create_user_cq(cq, &attr, attrs);
 	if (ret)
 		goto err_free;
 

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 31/50] RDMA/core: Remove unused ib_resize_cq() implementation
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

There are no in-kernel users of the CQ resize functionality, so drop it.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/verbs.c | 10 ----------
 include/rdma/ib_verbs.h         |  9 ---------
 2 files changed, 19 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9d075eeda463..5f59487fc9d4 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2264,16 +2264,6 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
 }
 EXPORT_SYMBOL(ib_destroy_cq_user);
 
-int ib_resize_cq(struct ib_cq *cq, int cqe)
-{
-	if (cq->shared)
-		return -EOPNOTSUPP;
-
-	return cq->device->ops.resize_cq ?
-		cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(ib_resize_cq);
-
 /* Memory regions */
 
 struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 67aa5fc2c0b7..b8adc2f17e73 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4001,15 +4001,6 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
 #define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
 	__ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
 
-/**
- * ib_resize_cq - Modifies the capacity of the CQ.
- * @cq: The CQ to resize.
- * @cqe: The minimum size of the CQ.
- *
- * Users can examine the cq structure to determine the actual CQ size.
- */
-int ib_resize_cq(struct ib_cq *cq, int cqe);
-
 /**
  * rdma_set_cq_moderation - Modifies moderation params of the CQ
  * @cq: The CQ to modify.

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 29/50] RDMA/rxe: Split user and kernel CQ creation paths
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_verbs.c | 81 ++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 38d8c408320f..1e651bdd8622 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1072,58 +1072,70 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 }
 
 /* cq */
-static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-			 struct uverbs_attr_bundle *attrs)
+static int rxe_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			      struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *dev = ibcq->device;
 	struct rxe_dev *rxe = to_rdev(dev);
 	struct rxe_cq *cq = to_rcq(ibcq);
-	struct rxe_create_cq_resp __user *uresp = NULL;
-	int err, cleanup_err;
+	struct rxe_create_cq_resp __user *uresp;
+	int err;
 
-	if (udata) {
-		if (udata->outlen < sizeof(*uresp)) {
-			err = -EINVAL;
-			rxe_dbg_dev(rxe, "malformed udata, err = %d\n", err);
-			goto err_out;
-		}
-		uresp = udata->outbuf;
-	}
+	if (udata->outlen < sizeof(*uresp))
+		return -EINVAL;
 
-	if (attr->flags) {
-		err = -EOPNOTSUPP;
-		rxe_dbg_dev(rxe, "bad attr->flags, err = %d\n", err);
-		goto err_out;
-	}
+	uresp = udata->outbuf;
 
-	err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
-	if (err) {
-		rxe_dbg_dev(rxe, "bad init attributes, err = %d\n", err);
-		goto err_out;
-	}
+	if (attr->flags || ibcq->umem)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > rxe->attr.max_cqe)
+		return -EINVAL;
 
 	err = rxe_add_to_pool(&rxe->cq_pool, cq);
-	if (err) {
-		rxe_dbg_dev(rxe, "unable to create cq, err = %d\n", err);
-		goto err_out;
-	}
+	if (err)
+		return err;
 
 	err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
 			       uresp);
-	if (err) {
-		rxe_dbg_cq(cq, "create cq failed, err = %d\n", err);
+	if (err)
 		goto err_cleanup;
-	}
 
 	return 0;
 
 err_cleanup:
-	cleanup_err = rxe_cleanup(cq);
-	if (cleanup_err)
-		rxe_err_cq(cq, "cleanup failed, err = %d\n", cleanup_err);
-err_out:
-	rxe_err_dev(rxe, "returned err = %d\n", err);
+	rxe_cleanup(cq);
+	return err;
+}
+
+static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs)
+{
+	struct ib_device *dev = ibcq->device;
+	struct rxe_dev *rxe = to_rdev(dev);
+	struct rxe_cq *cq = to_rcq(ibcq);
+	int err;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > rxe->attr.max_cqe)
+		return -EINVAL;
+
+	err = rxe_add_to_pool(&rxe->cq_pool, cq);
+	if (err)
+		return err;
+
+	err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, NULL,
+			       NULL);
+	if (err)
+		goto err_cleanup;
+
+	return 0;
+
+err_cleanup:
+	rxe_cleanup(cq);
 	return err;
 }
 
@@ -1478,6 +1490,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.attach_mcast = rxe_attach_mcast,
 	.create_ah = rxe_create_ah,
 	.create_cq = rxe_create_cq,
+	.create_user_cq = rxe_create_user_cq,
 	.create_qp = rxe_create_qp,
 	.create_srq = rxe_create_srq,
 	.create_user_ah = rxe_create_ah,

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 33/50] RDMA/bnxt_re: Drop support for resizing kernel CQs
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

There are no ULP callers that use the CQ resize functionality, so remove it.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index b8516d8b8426..16bb586d68c7 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3338,10 +3338,6 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
 	rdev = cq->rdev;
 	dev_attr = rdev->dev_attr;
-	if (!ibcq->uobject) {
-		ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
-		return -EOPNOTSUPP;
-	}
 
 	if (cq->resize_umem) {
 		ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
@@ -3375,7 +3371,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 		ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %pe\n",
 			  __func__, cq->resize_umem);
 		cq->resize_umem = NULL;
-		goto fail;
+		return rc;
 	}
 	cq->resize_cqe = entries;
 	memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info));
@@ -3399,13 +3395,11 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	return 0;
 
 fail:
-	if (cq->resize_umem) {
-		ib_umem_release(cq->resize_umem);
-		cq->resize_umem = NULL;
-		cq->resize_cqe = 0;
-		memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
-		cq->qplib_cq.dpi = orig_dpi;
-	}
+	ib_umem_release(cq->resize_umem);
+	cq->resize_umem = NULL;
+	cq->resize_cqe = 0;
+	memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
+	cq->qplib_cq.dpi = orig_dpi;
 	return rc;
 }
 

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 34/50] RDMA/irdma: Remove resize support for kernel CQs
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The CQ resize operation is a uverbs-only interface and is not required for
kernel-created CQs. Drop this unused functionality.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/irdma/verbs.c | 88 +++++++++----------------------------
 1 file changed, 21 insertions(+), 67 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index f727d1922a84..d5442aebf1ac 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -2015,6 +2015,9 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
 			   struct ib_udata *udata)
 {
+	struct irdma_resize_cq_req req = {};
+	struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct irdma_ucontext, ibucontext);
 #define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer)
 	struct irdma_cq *iwcq = to_iwcq(ibcq);
 	struct irdma_sc_dev *dev = iwcq->sc_cq.dev;
@@ -2029,7 +2032,6 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
 	struct irdma_pci_f *rf;
 	struct irdma_cq_buf *cq_buf = NULL;
 	unsigned long flags;
-	u8 cqe_size;
 	int ret;
 
 	iwdev = to_iwdev(ibcq->device);
@@ -2039,81 +2041,39 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
 	    IRDMA_FEATURE_CQ_RESIZE))
 		return -EOPNOTSUPP;
 
-	if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN)
+	if (udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN)
 		return -EINVAL;
 
 	if (entries > rf->max_cqe)
 		return -EINVAL;
 
-	if (!iwcq->user_mode) {
-		entries += 2;
-
-		if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct &&
-		    dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
-			entries *= 2;
-
-		if (entries & 1)
-			entries += 1; /* cq size must be an even number */
-
-		cqe_size = iwcq->sc_cq.cq_uk.avoid_mem_cflct ? 64 : 32;
-		if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
-			entries += 2;
-	}
-
 	info.cq_size = max(entries, 4);
 
 	if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1)
 		return 0;
 
-	if (udata) {
-		struct irdma_resize_cq_req req = {};
-		struct irdma_ucontext *ucontext =
-			rdma_udata_to_drv_context(udata, struct irdma_ucontext,
-						  ibucontext);
-
-		/* CQ resize not supported with legacy GEN_1 libi40iw */
-		if (ucontext->legacy_mode)
-			return -EOPNOTSUPP;
+	/* CQ resize not supported with legacy GEN_1 libi40iw */
+	if (ucontext->legacy_mode)
+		return -EOPNOTSUPP;
 
-		if (ib_copy_from_udata(&req, udata,
-				       min(sizeof(req), udata->inlen)))
-			return -EINVAL;
+	if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)))
+		return -EINVAL;
 
-		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
-		iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer,
-					  &ucontext->cq_reg_mem_list);
-		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+	spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+	iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer,
+				  &ucontext->cq_reg_mem_list);
+	spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
 
-		if (!iwpbl_buf)
-			return -ENOMEM;
+	if (!iwpbl_buf)
+		return -ENOMEM;
 
-		cqmr_buf = &iwpbl_buf->cq_mr;
-		if (iwpbl_buf->pbl_allocated) {
-			info.virtual_map = true;
-			info.pbl_chunk_size = 1;
-			info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx;
-		} else {
-			info.cq_pa = cqmr_buf->cq_pbl.addr;
-		}
+	cqmr_buf = &iwpbl_buf->cq_mr;
+	if (iwpbl_buf->pbl_allocated) {
+		info.virtual_map = true;
+		info.pbl_chunk_size = 1;
+		info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx;
 	} else {
-		/* Kmode CQ resize */
-		int rsize;
-
-		rsize = info.cq_size * sizeof(struct irdma_cqe);
-		kmem_buf.size = ALIGN(round_up(rsize, 256), 256);
-		kmem_buf.va = dma_alloc_coherent(dev->hw->device,
-						 kmem_buf.size, &kmem_buf.pa,
-						 GFP_KERNEL);
-		if (!kmem_buf.va)
-			return -ENOMEM;
-
-		info.cq_base = kmem_buf.va;
-		info.cq_pa = kmem_buf.pa;
-		cq_buf = kzalloc(sizeof(*cq_buf), GFP_KERNEL);
-		if (!cq_buf) {
-			ret = -ENOMEM;
-			goto error;
-		}
+		info.cq_pa = cqmr_buf->cq_pbl.addr;
 	}
 
 	cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
@@ -2154,13 +2114,7 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
 
 	return 0;
 error:
-	if (!udata) {
-		dma_free_coherent(dev->hw->device, kmem_buf.size, kmem_buf.va,
-				  kmem_buf.pa);
-		kmem_buf.va = NULL;
-	}
 	kfree(cq_buf);
-
 	return ret;
 }
 

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 35/50] RDMA/mlx4: Remove support for kernel CQ resize
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

No upper‑layer protocol currently uses CQ resize, and the feature has no
active callers. Drop the unused functionality.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx4/cq.c | 167 +++++-----------------------------------
 1 file changed, 21 insertions(+), 146 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 83169060d120..05fad06b89c2 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -296,30 +296,6 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	return err;
 }
 
-static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
-				  int entries)
-{
-	int err;
-
-	if (cq->resize_buf)
-		return -EBUSY;
-
-	cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
-	if (!cq->resize_buf)
-		return -ENOMEM;
-
-	err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
-	if (err) {
-		kfree(cq->resize_buf);
-		cq->resize_buf = NULL;
-		return err;
-	}
-
-	cq->resize_buf->cqe = entries - 1;
-
-	return 0;
-}
-
 static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
 				   int entries, struct ib_udata *udata)
 {
@@ -329,9 +305,6 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
 	int n;
 	int err;
 
-	if (cq->resize_umem)
-		return -EBUSY;
-
 	if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 		return -EFAULT;
 
@@ -371,91 +344,36 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
 
 err_umem:
 	ib_umem_release(cq->resize_umem);
-
+	cq->resize_umem = NULL;
 err_buf:
 	kfree(cq->resize_buf);
 	cq->resize_buf = NULL;
 	return err;
 }
 
-static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
-{
-	u32 i;
-
-	i = cq->mcq.cons_index;
-	while (get_sw_cqe(cq, i))
-		++i;
-
-	return i - cq->mcq.cons_index;
-}
-
-static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
-{
-	struct mlx4_cqe *cqe, *new_cqe;
-	int i;
-	int cqe_size = cq->buf.entry_size;
-	int cqe_inc = cqe_size == 64 ? 1 : 0;
-
-	i = cq->mcq.cons_index;
-	cqe = get_cqe(cq, i & cq->ibcq.cqe);
-	cqe += cqe_inc;
-
-	while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
-		new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
-					   (i + 1) & cq->resize_buf->cqe);
-		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
-		new_cqe += cqe_inc;
-
-		new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
-			(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
-		cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
-		cqe += cqe_inc;
-	}
-	++cq->mcq.cons_index;
-}
-
 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
 	struct mlx4_ib_cq *cq = to_mcq(ibcq);
 	struct mlx4_mtt mtt;
-	int outst_cqe;
 	int err;
 
-	mutex_lock(&cq->resize_mutex);
-	if (entries < 1 || entries > dev->dev->caps.max_cqes) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (entries < 1 || entries > dev->dev->caps.max_cqes)
+		return -EINVAL;
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries == ibcq->cqe + 1) {
-		err = 0;
-		goto out;
-	}
-
-	if (entries > dev->dev->caps.max_cqes + 1) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (entries == ibcq->cqe + 1)
+		return 0;
 
-	if (ibcq->uobject) {
-		err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
-		if (err)
-			goto out;
-	} else {
-		/* Can't be smaller than the number of outstanding CQEs */
-		outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
-		if (entries < outst_cqe + 1) {
-			err = -EINVAL;
-			goto out;
-		}
+	if (entries > dev->dev->caps.max_cqes + 1)
+		return -EINVAL;
 
-		err = mlx4_alloc_resize_buf(dev, cq, entries);
-		if (err)
-			goto out;
+	mutex_lock(&cq->resize_mutex);
+	err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
+	if (err) {
+		mutex_unlock(&cq->resize_mutex);
+		return err;
 	}
-
 	mtt = cq->buf.mtt;
 
 	err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt);
@@ -463,52 +381,26 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		goto err_buf;
 
 	mlx4_mtt_cleanup(dev->dev, &mtt);
-	if (ibcq->uobject) {
-		cq->buf      = cq->resize_buf->buf;
-		cq->ibcq.cqe = cq->resize_buf->cqe;
-		ib_umem_release(cq->ibcq.umem);
-		cq->ibcq.umem     = cq->resize_umem;
-
-		kfree(cq->resize_buf);
-		cq->resize_buf = NULL;
-		cq->resize_umem = NULL;
-	} else {
-		struct mlx4_ib_cq_buf tmp_buf;
-		int tmp_cqe = 0;
-
-		spin_lock_irq(&cq->lock);
-		if (cq->resize_buf) {
-			mlx4_ib_cq_resize_copy_cqes(cq);
-			tmp_buf = cq->buf;
-			tmp_cqe = cq->ibcq.cqe;
-			cq->buf      = cq->resize_buf->buf;
-			cq->ibcq.cqe = cq->resize_buf->cqe;
-
-			kfree(cq->resize_buf);
-			cq->resize_buf = NULL;
-		}
-		spin_unlock_irq(&cq->lock);
+	cq->buf = cq->resize_buf->buf;
+	cq->ibcq.cqe = cq->resize_buf->cqe;
+	ib_umem_release(cq->ibcq.umem);
+	cq->ibcq.umem = cq->resize_umem;
 
-		if (tmp_cqe)
-			mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe);
-	}
+	kfree(cq->resize_buf);
+	cq->resize_buf = NULL;
+	cq->resize_umem = NULL;
+	mutex_unlock(&cq->resize_mutex);
+	return 0;
 
-	goto out;
 
 err_buf:
 	mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt);
-	if (!ibcq->uobject)
-		mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf,
-				    cq->resize_buf->cqe);
-
 	kfree(cq->resize_buf);
 	cq->resize_buf = NULL;
 
 	ib_umem_release(cq->resize_umem);
 	cq->resize_umem = NULL;
-out:
 	mutex_unlock(&cq->resize_mutex);
-
 	return err;
 }
 
@@ -707,7 +599,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 	u16 wqe_ctr;
 	unsigned tail = 0;
 
-repoll:
 	cqe = next_cqe_sw(cq);
 	if (!cqe)
 		return -EAGAIN;
@@ -727,22 +618,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	/* Resize CQ in progress */
-	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
-		if (cq->resize_buf) {
-			struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device);
-
-			mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
-			cq->buf      = cq->resize_buf->buf;
-			cq->ibcq.cqe = cq->resize_buf->cqe;
-
-			kfree(cq->resize_buf);
-			cq->resize_buf = NULL;
-		}
-
-		goto repoll;
-	}
-
 	if (!*cur_qp ||
 	    (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
 		/*

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 32/50] RDMA: Clarify that CQ resize is a user‑space verb
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The CQ resize operation is used only by uverbs. Make this explicit.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/device.c             | 2 +-
 drivers/infiniband/core/uverbs_cmd.c         | 4 ++--
 drivers/infiniband/hw/bnxt_re/main.c         | 2 +-
 drivers/infiniband/hw/irdma/verbs.c          | 2 +-
 drivers/infiniband/hw/mlx4/main.c            | 2 +-
 drivers/infiniband/hw/mlx5/main.c            | 2 +-
 drivers/infiniband/hw/mthca/mthca_provider.c | 2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c   | 2 +-
 drivers/infiniband/sw/rdmavt/vt.c            | 2 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c        | 2 +-
 include/rdma/ib_verbs.h                      | 3 ++-
 11 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 9209b8c664ef..9411f7805eed 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2799,7 +2799,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
 	SET_DEVICE_OP(dev_ops, req_notify_cq);
 	SET_DEVICE_OP(dev_ops, rereg_user_mr);
-	SET_DEVICE_OP(dev_ops, resize_cq);
+	SET_DEVICE_OP(dev_ops, resize_user_cq);
 	SET_DEVICE_OP(dev_ops, set_vf_guid);
 	SET_DEVICE_OP(dev_ops, set_vf_link_state);
 	SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cdfee86fb800..57697738fd25 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1151,7 +1151,7 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
 	if (IS_ERR(cq))
 		return PTR_ERR(cq);
 
-	ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
+	ret = cq->device->ops.resize_user_cq(cq, cmd.cqe, &attrs->driver_udata);
 	if (ret)
 		goto out;
 
@@ -3811,7 +3811,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
 				     UAPI_DEF_WRITE_UDATA_IO(
 					     struct ib_uverbs_resize_cq,
 					     struct ib_uverbs_resize_cq_resp),
-				     UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+				     UAPI_DEF_METHOD_NEEDS_FN(resize_user_cq)),
 		DECLARE_UVERBS_WRITE_EX(
 			IB_USER_VERBS_EX_CMD_CREATE_CQ,
 			ib_uverbs_ex_create_cq,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 368c1fd8172e..ccc01fc222ca 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1373,7 +1373,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
 	.reg_user_mr = bnxt_re_reg_user_mr,
 	.reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
 	.req_notify_cq = bnxt_re_req_notify_cq,
-	.resize_cq = bnxt_re_resize_cq,
+	.resize_user_cq = bnxt_re_resize_cq,
 	.create_flow = bnxt_re_create_flow,
 	.destroy_flow = bnxt_re_destroy_flow,
 	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index f2b3cfe125af..f727d1922a84 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -5460,7 +5460,7 @@ static const struct ib_device_ops irdma_dev_ops = {
 	.reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf,
 	.rereg_user_mr = irdma_rereg_user_mr,
 	.req_notify_cq = irdma_req_notify_cq,
-	.resize_cq = irdma_resize_cq,
+	.resize_user_cq = irdma_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, irdma_ucontext, ibucontext),
 	INIT_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah),
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index fc05e7a1a870..daf95f94ec6f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2570,7 +2570,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
 	.reg_user_mr = mlx4_ib_reg_user_mr,
 	.req_notify_cq = mlx4_ib_arm_cq,
 	.rereg_user_mr = mlx4_ib_rereg_user_mr,
-	.resize_cq = mlx4_ib_resize_cq,
+	.resize_user_cq = mlx4_ib_resize_cq,
 	.report_port_event = mlx4_ib_port_event,
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4f49f65e2c16..0471155eb739 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4496,7 +4496,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
 	.reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf,
 	.req_notify_cq = mlx5_ib_arm_cq,
 	.rereg_user_mr = mlx5_ib_rereg_user_mr,
-	.resize_cq = mlx5_ib_resize_cq,
+	.resize_user_cq = mlx5_ib_resize_cq,
 	.ufile_hw_cleanup = mlx5_ib_ufile_hw_cleanup,
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6bf825978846..8920deceea73 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1119,7 +1119,7 @@ static const struct ib_device_ops mthca_dev_ops = {
 	.query_port = mthca_query_port,
 	.query_qp = mthca_query_qp,
 	.reg_user_mr = mthca_reg_user_mr,
-	.resize_cq = mthca_resize_cq,
+	.resize_user_cq = mthca_resize_cq,
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, mthca_cq, ibcq),
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 0d89c5ec9a7a..7dafebc7f57e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -167,7 +167,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
 	.query_qp = ocrdma_query_qp,
 	.reg_user_mr = ocrdma_reg_user_mr,
 	.req_notify_cq = ocrdma_arm_cq,
-	.resize_cq = ocrdma_resize_cq,
+	.resize_user_cq = ocrdma_resize_cq,
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, ocrdma_cq, ibcq),
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 15964400b8d3..5aff65b3916b 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -368,7 +368,7 @@ static const struct ib_device_ops rvt_dev_ops = {
 	.query_srq = rvt_query_srq,
 	.reg_user_mr = rvt_reg_user_mr,
 	.req_notify_cq = rvt_req_notify_cq,
-	.resize_cq = rvt_resize_cq,
+	.resize_user_cq = rvt_resize_cq,
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq),
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 1e651bdd8622..72e3019ed1cb 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1532,7 +1532,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.reg_user_mr = rxe_reg_user_mr,
 	.req_notify_cq = rxe_req_notify_cq,
 	.rereg_user_mr = rxe_rereg_user_mr,
-	.resize_cq = rxe_resize_cq,
+	.resize_user_cq = rxe_resize_cq,
 
 	INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b8adc2f17e73..94bb3cc4c67a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2534,7 +2534,8 @@ struct ib_device_ops {
 			      struct uverbs_attr_bundle *attrs);
 	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 	int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
-	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+	int (*resize_user_cq)(struct ib_cq *cq, int cqe,
+			      struct ib_udata *udata);
 	/*
 	 * pre_destroy_cq - Prevent a cq from generating any new work
 	 * completions, but not free any kernel resources

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 37/50] RDMA/mthca: Remove resize support for kernel CQs
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The CQ resize operation is a uverbs-only interface and is not required for
kernel-created CQs. Drop this unused functionality.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mthca/mthca_provider.c | 102 ++-------------------------
 1 file changed, 6 insertions(+), 96 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 8920deceea73..fd306a229318 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -626,8 +626,6 @@ static int mthca_create_user_cq(struct ib_cq *ibcq,
 		goto err_unmap_arm;
 	}
 
-	cq->resize_buf = NULL;
-
 	return 0;
 
 err_unmap_arm:
@@ -667,53 +665,6 @@ static int mthca_create_cq(struct ib_cq *ibcq,
 	if (err)
 		return err;
 
-	cq->resize_buf = NULL;
-
-	return 0;
-}
-
-static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
-				  int entries)
-{
-	int ret;
-
-	spin_lock_irq(&cq->lock);
-	if (cq->resize_buf) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
-	if (!cq->resize_buf) {
-		ret = -ENOMEM;
-		goto unlock;
-	}
-
-	cq->resize_buf->state = CQ_RESIZE_ALLOC;
-
-	ret = 0;
-
-unlock:
-	spin_unlock_irq(&cq->lock);
-
-	if (ret)
-		return ret;
-
-	ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
-	if (ret) {
-		spin_lock_irq(&cq->lock);
-		kfree(cq->resize_buf);
-		cq->resize_buf = NULL;
-		spin_unlock_irq(&cq->lock);
-		return ret;
-	}
-
-	cq->resize_buf->cqe = entries - 1;
-
-	spin_lock_irq(&cq->lock);
-	cq->resize_buf->state = CQ_RESIZE_READY;
-	spin_unlock_irq(&cq->lock);
-
 	return 0;
 }
 
@@ -736,60 +687,19 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
 		goto out;
 	}
 
-	if (cq->is_kernel) {
-		ret = mthca_alloc_resize_buf(dev, cq, entries);
-		if (ret)
-			goto out;
-		lkey = cq->resize_buf->buf.mr.ibmr.lkey;
-	} else {
-		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		lkey = ucmd.lkey;
+	if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+		ret = -EFAULT;
+		goto out;
 	}
+	lkey = ucmd.lkey;
 
 	ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries));
-
-	if (ret) {
-		if (cq->resize_buf) {
-			mthca_free_cq_buf(dev, &cq->resize_buf->buf,
-					  cq->resize_buf->cqe);
-			kfree(cq->resize_buf);
-			spin_lock_irq(&cq->lock);
-			cq->resize_buf = NULL;
-			spin_unlock_irq(&cq->lock);
-		}
+	if (ret)
 		goto out;
-	}
-
-	if (cq->is_kernel) {
-		struct mthca_cq_buf tbuf;
-		int tcqe;
-
-		spin_lock_irq(&cq->lock);
-		if (cq->resize_buf->state == CQ_RESIZE_READY) {
-			mthca_cq_resize_copy_cqes(cq);
-			tbuf         = cq->buf;
-			tcqe         = cq->ibcq.cqe;
-			cq->buf      = cq->resize_buf->buf;
-			cq->ibcq.cqe = cq->resize_buf->cqe;
-		} else {
-			tbuf = cq->resize_buf->buf;
-			tcqe = cq->resize_buf->cqe;
-		}
-
-		kfree(cq->resize_buf);
-		cq->resize_buf = NULL;
-		spin_unlock_irq(&cq->lock);
-
-		mthca_free_cq_buf(dev, &tbuf, tcqe);
-	} else
-		ibcq->cqe = entries - 1;
 
+	ibcq->cqe = entries - 1;
 out:
 	mutex_unlock(&cq->mutex);
-
 	return ret;
 }
 

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 38/50] RDMA/rdmavt: Remove resize support for kernel CQs
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The CQ resize operation is a uverbs-only interface and is not needed for
CQs created by the kernel. Remove this unused functionality.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/sw/rdmavt/cq.c | 70 ++++++++++++---------------------------
 1 file changed, 21 insertions(+), 49 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index db86eb026bb3..1ae5d8c86acb 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -408,51 +408,36 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	struct rvt_dev_info *rdi = cq->rdi;
 	struct rvt_cq_wc *u_wc = NULL;
 	struct rvt_cq_wc *old_u_wc = NULL;
-	struct rvt_k_cq_wc *k_wc = NULL;
-	struct rvt_k_cq_wc *old_k_wc = NULL;
+	__u64 offset = 0;
 
 	if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
 		return -EINVAL;
 
+	if (udata->outlen < sizeof(__u64))
+		return -EINVAL;
+
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
-		sz += sizeof(*u_wc);
-		u_wc = vmalloc_user(sz);
-		if (!u_wc)
-			return -ENOMEM;
-	} else {
-		sz = sizeof(struct ib_wc) * (cqe + 1);
-		sz += sizeof(*k_wc);
-		k_wc = vzalloc_node(sz, rdi->dparms.node);
-		if (!k_wc)
-			return -ENOMEM;
-	}
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
+	sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
+	sz += sizeof(*u_wc);
+	u_wc = vmalloc_user(sz);
+	if (!u_wc)
+		return -ENOMEM;
 
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
+	/* Check that we can write the offset to mmap. */
+	ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+	if (ret)
+		goto bail_free;
 
 	spin_lock_irq(&cq->lock);
 	/*
 	 * Make sure head and tail are sane since they
 	 * might be user writable.
 	 */
-	if (u_wc) {
-		old_u_wc = cq->queue;
-		head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
-		tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
-	} else {
-		old_k_wc = cq->kqueue;
-		head = old_k_wc->head;
-		tail = old_k_wc->tail;
-	}
+	old_u_wc = cq->queue;
+	head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
+	tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
 
 	if (head > (u32)cq->ibcq.cqe)
 		head = (u32)cq->ibcq.cqe;
@@ -467,31 +452,19 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 		goto bail_unlock;
 	}
 	for (n = 0; tail != head; n++) {
-		if (u_wc)
-			u_wc->uqueue[n] = old_u_wc->uqueue[tail];
-		else
-			k_wc->kqueue[n] = old_k_wc->kqueue[tail];
+		u_wc->uqueue[n] = old_u_wc->uqueue[tail];
 		if (tail == (u32)cq->ibcq.cqe)
 			tail = 0;
 		else
 			tail++;
 	}
 	cq->ibcq.cqe = cqe;
-	if (u_wc) {
-		RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
-		RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
-		cq->queue = u_wc;
-	} else {
-		k_wc->head = n;
-		k_wc->tail = 0;
-		cq->kqueue = k_wc;
-	}
+	RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
+	RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
+	cq->queue = u_wc;
 	spin_unlock_irq(&cq->lock);
 
-	if (u_wc)
-		vfree(old_u_wc);
-	else
-		vfree(old_k_wc);
+	vfree(old_u_wc);
 
 	if (cq->ip) {
 		struct rvt_mmap_info *ip = cq->ip;
@@ -521,7 +494,6 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	spin_unlock_irq(&cq->lock);
 bail_free:
 	vfree(u_wc);
-	vfree(k_wc);
 
 	return ret;
 }

-- 
2.52.0


^ permalink raw reply related

* [PATCH rdma-next 39/50] RDMA/rxe: Remove unused kernel‑side CQ resize support
From: Leon Romanovsky @ 2026-02-13 10:58 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Selvin Xavier, Kalesh AP,
	Potnuri Bharat Teja, Michael Margolin, Gal Pressman,
	Yossi Leybovich, Cheng Xu, Kai Shen, Chengchang Tang,
	Junxian Huang, Abhijit Gangurde, Allen Hubbe, Krzysztof Czurylo,
	Tatyana Nikolova, Long Li, Konstantin Taranov, Yishai Hadas,
	Michal Kalderon, Bryan Tan, Vishnu Dasa,
	Broadcom internal kernel review list, Christian Benvenuti,
	Nelson Escobar, Dennis Dalessandro, Bernard Metzler, Zhu Yanjun
  Cc: linux-kernel, linux-rdma, linux-hyperv
In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

CQ resizing is only used by uverbs; the kernel‑side CQ resize path has
no users and can be removed.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_verbs.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 72e3019ed1cb..bc7c77ff3d90 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1146,32 +1146,19 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	struct rxe_resize_cq_resp __user *uresp = NULL;
 	int err;
 
-	if (udata) {
-		if (udata->outlen < sizeof(*uresp)) {
-			err = -EINVAL;
-			rxe_dbg_cq(cq, "malformed udata\n");
-			goto err_out;
-		}
-		uresp = udata->outbuf;
-	}
+	if (udata->outlen < sizeof(*uresp))
+		return -EINVAL;
+	uresp = udata->outbuf;
 
 	err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
-	if (err) {
-		rxe_dbg_cq(cq, "bad attr, err = %d\n", err);
-		goto err_out;
-	}
+	if (err)
+		return err;
 
 	err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
-	if (err) {
-		rxe_dbg_cq(cq, "resize cq failed, err = %d\n", err);
-		goto err_out;
-	}
+	if (err)
+		return err;
 
 	return 0;
-
-err_out:
-	rxe_err_cq(cq, "returned err = %d\n", err);
-	return err;
 }
 
 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)

-- 
2.52.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox