Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
From: Jiri Pirko <jiri@resnulli.us>
To: linux-rdma@vger.kernel.org
Cc: jgg@ziepe.ca, leon@kernel.org, mrgolin@amazon.com,
	gal.pressman@linux.dev, sleybo@amazon.com, parav@nvidia.com,
	mbloch@nvidia.com, yanjun.zhu@linux.dev,
	marco.crivellari@suse.com, roman.gushchin@linux.dev,
	phaddad@nvidia.com, lirongqing@baidu.com, ynachum@amazon.com,
	huangjunxian6@hisilicon.com, kalesh-anakkur.purayil@broadcom.com,
	ohartoov@nvidia.com, michaelgur@nvidia.com, shayd@nvidia.com,
	edwards@nvidia.com, sriharsha.basavapatna@broadcom.com,
	andrew.gospodarek@broadcom.com, selvin.xavier@broadcom.com
Subject: [PATCH rdma-next v3 15/17] RDMA/mlx5: Use UMEM attribute for CQ doorbell record
Date: Mon,  4 May 2026 15:57:29 +0200	[thread overview]
Message-ID: <20260504135731.2345383-16-jiri@resnulli.us> (raw)
In-Reply-To: <20260504135731.2345383-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@nvidia.com>

Add an optional mlx5 driver-namespace UMEM attribute on CQ
create so userspace can supply the doorbell record umem
explicitly. Resolve it inside mlx5_ib_db_map_user() and use it
as a private DBR page when present; otherwise take the existing
UHW share-or-pin path that preserves per-page DBR sharing
across CQ/QP/SRQ in the same process.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
v2->v3:
- moved CQ DBR attr to mlx5 driver namespace
- changed to use ib_umem_get_attr() to get umem
- added page-crossing check
---
 drivers/infiniband/hw/mlx5/cq.c          |  8 +++-
 drivers/infiniband/hw/mlx5/doorbell.c    | 51 ++++++++++++++++++++----
 drivers/infiniband/hw/mlx5/mlx5_ib.h     |  5 ++-
 drivers/infiniband/hw/mlx5/qp.c          |  4 +-
 drivers/infiniband/hw/mlx5/srq.c         |  2 +-
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  1 +
 6 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fb6172a9be57..b69ab49b1b3e 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -760,7 +760,9 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 		goto err_umem;
 	}
 
-	err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
+	err = mlx5_ib_db_map_user(context, udata,
+				  MLX5_IB_ATTR_CREATE_CQ_DBR_BUF_UMEM,
+				  ucmd.db_addr, &cq->db);
 	if (err)
 		goto err_umem;
 
@@ -1519,7 +1521,9 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	UVERBS_ATTR_PTR_IN(
 		MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX,
 		UVERBS_ATTR_TYPE(u32),
-		UA_OPTIONAL));
+		UA_OPTIONAL),
+	UVERBS_ATTR_UMEM(MLX5_IB_ATTR_CREATE_CQ_DBR_BUF_UMEM,
+			 UA_OPTIONAL));
 
 const struct uapi_definition mlx5_ib_create_cq_defs[] = {
 	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create),
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 020c70328663..c9f9b9179e88 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -45,20 +45,56 @@ struct mlx5_ib_user_db_page {
 	struct mm_struct	*mm;
 };
 
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
-			struct mlx5_db *db)
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+			struct ib_udata *udata, u16 attr_id,
+			unsigned long virt, struct mlx5_db *db)
 {
-	struct mlx5_ib_user_db_page *page;
+	struct mlx5_ib_user_db_page *page = NULL;
+	unsigned long dma_offset;
 	int err = 0;
 
+	if (udata) {
+		struct ib_umem *umem;
+
+		umem = ib_umem_get_attr(context->ibucontext.device, udata,
+					attr_id, sizeof(__be32) * 2, 0);
+		if (IS_ERR(umem))
+			return PTR_ERR(umem);
+		if (umem) {
+			/*
+			 * The 8-byte DBR is programmed to the device as one
+			 * DMA address, so it must stay within a single page.
+			 * An 8-byte range that crosses a page boundary may
+			 * be split across two non-contiguous DMA mappings.
+			 */
+			if (ib_umem_offset(umem) >
+			    PAGE_SIZE - sizeof(__be32) * 2) {
+				ib_umem_release(umem);
+				return -EINVAL;
+			}
+			page = kzalloc_obj(*page);
+			if (!page) {
+				ib_umem_release(umem);
+				return -ENOMEM;
+			}
+			page->umem = umem;
+			dma_offset = ib_umem_offset(umem);
+		}
+	}
+
 	mutex_lock(&context->db_page_mutex);
 
+	if (page)
+		goto add_page;
+
+	dma_offset = virt & ~PAGE_MASK;
+
 	list_for_each_entry(page, &context->db_page_list, list)
 		if ((current->mm == page->mm) &&
 		    (page->user_virt == (virt & PAGE_MASK)))
 			goto found;
 
-	page = kmalloc_obj(*page);
+	page = kzalloc_obj(*page);
 	if (!page) {
 		err = -ENOMEM;
 		goto out;
@@ -76,11 +112,11 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
 	mmgrab(current->mm);
 	page->mm = current->mm;
 
+add_page:
 	list_add(&page->list, &context->db_page_list);
 
 found:
-	db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
-		  (virt & ~PAGE_MASK);
+	db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + dma_offset;
 	db->u.user_page = page;
 	++page->refcnt;
 
@@ -96,7 +132,8 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
 
 	if (!--db->u.user_page->refcnt) {
 		list_del(&db->u.user_page->list);
-		mmdrop(db->u.user_page->mm);
+		if (db->u.user_page->mm)
+			mmdrop(db->u.user_page->mm);
 		ib_umem_release(db->u.user_page->umem);
 		kfree(db->u.user_page);
 	}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e156dc4d7529..45bc8928523a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1259,8 +1259,9 @@ to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
 
 int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev);
 int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev);
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
-			struct mlx5_db *db);
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+			struct ib_udata *udata, u16 attr_id,
+			unsigned long virt, struct mlx5_db *db);
 void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1b764a573dd7..997ea9bcfc55 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -918,7 +918,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
 		offset);
 
-	err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db);
+	err = mlx5_ib_db_map_user(ucontext, NULL, 0, ucmd->db_addr, &rwq->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map failed\n");
 		goto err_umem;
@@ -1056,7 +1056,7 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		resp->bfreg_index = MLX5_IB_INVALID_BFREG;
 	qp->bfregn = bfregn;
 
-	err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
+	err = mlx5_ib_db_map_user(context, NULL, 0, ucmd->db_addr, &qp->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map failed\n");
 		goto err_free;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index bc22036d7e80..88db0143bc3f 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -74,7 +74,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 	}
 	in->umem = srq->umem;
 
-	err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db);
+	err = mlx5_ib_db_map_user(ucontext, NULL, 0, ucmd.db_addr, &srq->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map doorbell failed\n");
 		goto err_umem;
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 01a2a050e468..b63e75034cda 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -274,6 +274,7 @@ enum mlx5_ib_device_query_context_attrs {
 
 enum mlx5_ib_create_cq_attrs {
 	MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
+	MLX5_IB_ATTR_CREATE_CQ_DBR_BUF_UMEM,
 };
 
 enum mlx5_ib_reg_dmabuf_mr_attrs {
-- 
2.53.0


  parent reply	other threads:[~2026-05-04 13:58 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-04 13:57 [PATCH rdma-next v3 00/17] RDMA: Introduce generic buffer descriptor infrastructure for umem Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 01/17] RDMA/umem: Rename ib_umem_get() to ib_umem_get_va() Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 02/17] RDMA/umem: Split ib_umem_get_va() into a thin wrapper around __ib_umem_get_va() Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 03/17] RDMA/core: Introduce generic buffer descriptor infrastructure for umem Jiri Pirko
2026-05-06 13:37   ` Jason Gunthorpe
2026-05-06 14:14     ` Jiri Pirko
2026-05-12 18:12       ` Jason Gunthorpe
2026-05-04 13:57 ` [PATCH rdma-next v3 04/17] RDMA/umem: Route ib_umem_get_va() through ib_umem_get() Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 05/17] RDMA/uverbs: Inline _uverbs_get_const_{signed,unsigned}() Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 06/17] RDMA/uverbs: Push out CQ buffer umem processing into a helper Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 07/17] RDMA/uverbs: Add CQ buffer UMEM attribute and driver helpers Jiri Pirko
2026-05-06 13:46   ` Jason Gunthorpe
2026-05-06 14:27     ` Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 08/17] RDMA/efa: Use ib_umem_get_cq_buf() for user CQ buffer Jiri Pirko
2026-05-06 13:51   ` Jason Gunthorpe
2026-05-06 14:32     ` Jiri Pirko
2026-05-12 18:18       ` Jason Gunthorpe
2026-05-04 13:57 ` [PATCH rdma-next v3 09/17] RDMA/mlx5: Use ib_umem_get_cq_buf_or_va() " Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 10/17] RDMA/bnxt_re: " Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 11/17] RDMA/mlx4: Use ib_umem_get_cq_buf() " Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 12/17] RDMA/uverbs: Remove legacy umem field from struct ib_cq Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 13/17] RDMA/uverbs: Use UMEM attributes for QP creation Jiri Pirko
2026-05-06 13:43   ` Jason Gunthorpe
2026-05-06 14:17     ` Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 14/17] RDMA/mlx5: Use UMEM attributes for QP buffers in create_qp Jiri Pirko
2026-05-04 13:57 ` Jiri Pirko [this message]
2026-05-04 13:57 ` [PATCH rdma-next v3 16/17] RDMA/mlx5: Use UMEM attribute for QP doorbell record Jiri Pirko
2026-05-04 13:57 ` [PATCH rdma-next v3 17/17] RDMA/uverbs: Track attr consumption and warn on unused attrs Jiri Pirko
2026-05-06 13:56   ` Jason Gunthorpe
2026-05-06 14:22     ` Jiri Pirko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260504135731.2345383-16-jiri@resnulli.us \
    --to=jiri@resnulli.us \
    --cc=andrew.gospodarek@broadcom.com \
    --cc=edwards@nvidia.com \
    --cc=gal.pressman@linux.dev \
    --cc=huangjunxian6@hisilicon.com \
    --cc=jgg@ziepe.ca \
    --cc=kalesh-anakkur.purayil@broadcom.com \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=lirongqing@baidu.com \
    --cc=marco.crivellari@suse.com \
    --cc=mbloch@nvidia.com \
    --cc=michaelgur@nvidia.com \
    --cc=mrgolin@amazon.com \
    --cc=ohartoov@nvidia.com \
    --cc=parav@nvidia.com \
    --cc=phaddad@nvidia.com \
    --cc=roman.gushchin@linux.dev \
    --cc=selvin.xavier@broadcom.com \
    --cc=shayd@nvidia.com \
    --cc=sleybo@amazon.com \
    --cc=sriharsha.basavapatna@broadcom.com \
    --cc=yanjun.zhu@linux.dev \
    --cc=ynachum@amazon.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox