public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: jgg@ziepe.ca, leon@kernel.org, zyjzyj2000@gmail.com,
	linux-rdma@vger.kernel.org, yanjun.zhu@linux.dev, mie@igel.co.jp
Subject: [PATCH 2/2] RDMA/rxe: Add dma-buf support
Date: Wed, 25 Mar 2026 22:27:39 -0700	[thread overview]
Message-ID: <20260326052739.3778-3-yanjun.zhu@linux.dev> (raw)
In-Reply-To: <20260326052739.3778-1-yanjun.zhu@linux.dev>

Implement a ib device operation ‘reg_user_mr_dmabuf’. Generate a
rxe_map from the memory space linked the passed dma-buf.

Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
---
 drivers/infiniband/sw/rxe/rxe.c       |  2 +
 drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
 drivers/infiniband/sw/rxe/rxe_mr.c    | 89 ++++++++++++++++++++++++---
 drivers/infiniband/sw/rxe/rxe_odp.c   |  2 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c | 40 ++++++++++++
 drivers/infiniband/sw/rxe/rxe_verbs.h |  2 +-
 6 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index e891199cbdef..9920ea3104be 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -278,3 +278,5 @@ late_initcall(rxe_module_init);
 module_exit(rxe_module_exit);
 
 MODULE_ALIAS_RDMA_LINK("rxe");
+
+MODULE_IMPORT_NS("DMA_BUF");
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 7992290886e1..dc9a56450c82 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -66,6 +66,8 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
 		unsigned int length, enum rxe_mr_copy_dir dir);
+int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 length,
+			    u64 iova, int access, struct rxe_mr *mr);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
 	      void *addr, int length, enum rxe_mr_copy_dir dir);
 int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index c696ff874980..5c129a488b83 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/libnvdimm.h>
+#include <linux/dma-buf.h>
 
 #include "rxe.h"
 #include "rxe_loc.h"
@@ -90,7 +91,7 @@ static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
 {
 	int idx;
 
-	if (mr_page_size(mr) > PAGE_SIZE)
+	if (rxe_mr_page_size(mr) > PAGE_SIZE)
 		idx = (iova - (mr->ibmr.iova & mr->page_mask)) >> PAGE_SHIFT;
 	else
 		idx = (iova >> mr->page_shift) -
@@ -103,15 +104,15 @@ static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
 /*
  * Convert iova to offset within the page_info entry.
  *
- * For mr_page_size > PAGE_SIZE, the offset is within the system page.
- * For mr_page_size <= PAGE_SIZE, the offset is within the MR page size.
+ * For rxe_mr_page_size > PAGE_SIZE, the offset is within the system page.
+ * For rxe_mr_page_size <= PAGE_SIZE, the offset is within the MR page size.
  */
 static unsigned long rxe_mr_iova_to_page_offset(struct rxe_mr *mr, u64 iova)
 {
-	if (mr_page_size(mr) > PAGE_SIZE)
+	if (rxe_mr_page_size(mr) > PAGE_SIZE)
 		return iova & (PAGE_SIZE - 1);
 	else
-		return iova & (mr_page_size(mr) - 1);
+		return iova & (rxe_mr_page_size(mr) - 1);
 }
 
 static bool is_pmem_page(struct page *pg)
@@ -129,7 +130,7 @@ static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
 	struct page *page;
 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
 
-	WARN_ON(mr_page_size(mr) != PAGE_SIZE);
+	WARN_ON(rxe_mr_page_size(mr) != PAGE_SIZE);
 
 	__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
 	if (!__sg_page_iter_next(&sg_iter))
@@ -224,6 +225,75 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
 	return err;
 }
 
+static int rxe_map_dmabuf_mr(struct rxe_mr *mr, struct ib_umem_dmabuf *umem_dmabuf)
+{
+	unsigned int page_size = rxe_mr_page_size(mr);
+	struct sg_table *sgt = umem_dmabuf->sgt;
+	struct scatterlist *sg;
+	struct page *page;
+	int i, j, n = 0;
+
+	mr->page_shift = ilog2(page_size);
+	mr->page_mask = ~((u64)page_size - 1);
+	mr->nbuf = 0;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		page = sg_page(sg);
+		for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) {
+			mr->page_info[n].page = page + j;
+			mr->page_info[n].offset = 0;
+			n++;
+		}
+	}
+
+	mr->nbuf = n;
+	return 0;
+}
+
+int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 length,
+			    u64 iova, int access, struct rxe_mr *mr)
+{
+	struct ib_umem_dmabuf *umem_dmabuf;
+	int err;
+
+	umem_dmabuf = ib_umem_dmabuf_get(pd->ibpd.device, start, length, fd,
+					 access, NULL);
+	if (IS_ERR(umem_dmabuf)) {
+		err = PTR_ERR(umem_dmabuf);
+		goto err_out;
+	}
+
+	rxe_mr_init(access, mr);
+
+	err = alloc_mr_page_info(mr, ib_umem_num_pages(&umem_dmabuf->umem));
+	if (err) {
+		pr_warn("%s: Unable to allocate memory for map\n", __func__);
+		goto err_release_umem;
+	}
+
+	mr->ibmr.pd = &pd->ibpd;
+	mr->ibmr.iova = iova;
+	mr->umem = &umem_dmabuf->umem;
+	mr->access = access;
+	mr->state = RXE_MR_STATE_VALID;
+	mr->ibmr.type = IB_MR_TYPE_USER;
+
+	err = rxe_map_dmabuf_mr(mr, umem_dmabuf);
+	if (err)
+		goto err_free_mr_map;
+
+	return 0;
+
+err_free_mr_map:
+	free_mr_page_info(mr);
+
+err_release_umem:
+	ib_umem_release(&umem_dmabuf->umem);
+
+err_out:
+	return err;
+}
+
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
 {
 	int err;
@@ -260,7 +330,7 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr)
 {
 	struct rxe_mr *mr = to_rmr(ibmr);
 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
-	u32 i, pages_per_mr = mr_page_size(mr) >> PAGE_SHIFT;
+	u32 i, pages_per_mr = rxe_mr_page_size(mr) >> PAGE_SHIFT;
 
 	pages_per_mr = MAX(1, pages_per_mr);
 
@@ -288,7 +358,7 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
 		  int sg_nents, unsigned int *sg_offset)
 {
 	struct rxe_mr *mr = to_rmr(ibmr);
-	unsigned int page_size = mr_page_size(mr);
+	unsigned int page_size = rxe_mr_page_size(mr);
 
 	/*
 	 * Ensure page_size and PAGE_SIZE are compatible for mapping.
@@ -302,7 +372,7 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
 		return -EINVAL;
 	}
 
-	if (mr_page_size(mr) > PAGE_SIZE) {
+	if (rxe_mr_page_size(mr) > PAGE_SIZE) {
 		/* resize page_info if needed */
 		u32 map_mr_pages = (page_size >> PAGE_SHIFT) * mr->num_buf;
 
@@ -809,6 +879,7 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
 
 	rxe_put(mr_pd(mr));
+
 	ib_umem_release(mr->umem);
 
 	if (mr->ibmr.type != IB_MR_TYPE_DMA)
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
index bc11b1ec59ac..12c48f0cae47 100644
--- a/drivers/infiniband/sw/rxe/rxe_odp.c
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -351,7 +351,7 @@ int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
 		page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
 
 		bytes = min_t(unsigned int, length,
-			      mr_page_size(mr) - page_offset);
+			      rxe_mr_page_size(mr) - page_offset);
 
 		va = kmap_local_page(page);
 		arch_wb_cache_pmem(va + page_offset, bytes);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index fe41362c5144..1b5381b14d4b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1358,6 +1358,45 @@ static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags,
 	return NULL;
 }
 
+static struct ib_mr *rxe_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+					    u64 length, u64 iova, int fd,
+					    int access, struct ib_dmah *dmah,
+					    struct uverbs_attr_bundle *udata)
+{
+	int err;
+	struct rxe_dev *rxe = to_rdev(ibpd->device);
+	struct rxe_pd *pd = to_rpd(ibpd);
+	struct rxe_mr *mr;
+
+	mr = kzalloc_obj(*mr);
+	if (!mr) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	err = rxe_add_to_pool(&rxe->mr_pool, mr);
+	if (err)
+		goto err_free;
+
+	rxe_get(pd);
+
+	err = rxe_mr_dmabuf_init_user(pd, fd, start, length, iova, access, mr);
+	if (err)
+		goto err3;
+
+	return &mr->ibmr;
+
+err3:
+	rxe_put(pd);
+	rxe_put(mr);
+
+err_free:
+	kfree(mr);
+
+err_out:
+	return ERR_PTR(err);
+}
+
 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 				  u32 max_num_sg)
 {
@@ -1517,6 +1556,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.query_qp = rxe_query_qp,
 	.query_srq = rxe_query_srq,
 	.reg_user_mr = rxe_reg_user_mr,
+	.reg_user_mr_dmabuf = rxe_reg_user_mr_dmabuf,
 	.req_notify_cq = rxe_req_notify_cq,
 	.rereg_user_mr = rxe_rereg_user_mr,
 	.resize_cq = rxe_resize_cq,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index fb149f37e91d..9d77bec0bf3c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -364,7 +364,7 @@ struct rxe_mr {
 	struct rxe_mr_page	*page_info;
 };
 
-static inline unsigned int mr_page_size(struct rxe_mr *mr)
+static inline unsigned int rxe_mr_page_size(struct rxe_mr *mr)
 {
 	return mr ? mr->ibmr.page_size : PAGE_SIZE;
 }
-- 
2.53.0


      parent reply	other threads:[~2026-03-26  5:28 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-26  5:27 [PATCH 0/2] RDMA/rxe: Add dma-buf support for Soft-RoCE Zhu Yanjun
2026-03-26  5:27 ` [PATCH 1/2] RDMA/umem: Change for rdma devices has not dma device Zhu Yanjun
2026-03-26  5:27 ` Zhu Yanjun [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260326052739.3778-3-yanjun.zhu@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=jgg@ziepe.ca \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mie@igel.co.jp \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox