public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] RDMA/rxe: Add dma-buf support for Soft-RoCE
@ 2026-03-26  5:27 Zhu Yanjun
  2026-03-26  5:27 ` [PATCH 1/2] RDMA/umem: Change for rdma devices has not dma device Zhu Yanjun
  2026-03-26  5:27 ` [PATCH 2/2] RDMA/rxe: Add dma-buf support Zhu Yanjun
  0 siblings, 2 replies; 3+ messages in thread
From: Zhu Yanjun @ 2026-03-26  5:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma, yanjun.zhu, mie

This patchset introduces dma-buf support for the Soft-RoCE (RXE) driver.
By enabling dma-buf, RXE can now participate in zero-copy data transfers
with other providers (such as GPUs) that export memory via dma-buf fds.

Traditionally, RXE only supported user-space memory regions (UMEM) based
on system RAM. This change extends RXE’s capability to handle peer-to-peer
(P2P) like workflows in a software-defined RDMA environment.

This patchset pass the rdma-core tests with the following link for the
rdma-core:

https://github.com/linux-rdma/rdma-core/pull/1055

Zhu Yanjun (2):
  RDMA/umem: Change for rdma devices has not dma device
  RDMA/rxe: Add dma-buf support

 drivers/infiniband/core/umem_dmabuf.c | 35 ++++++++++-
 drivers/infiniband/sw/rxe/rxe.c       |  2 +
 drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
 drivers/infiniband/sw/rxe/rxe_mr.c    | 89 ++++++++++++++++++++++++---
 drivers/infiniband/sw/rxe/rxe_odp.c   |  2 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c | 40 ++++++++++++
 drivers/infiniband/sw/rxe/rxe_verbs.h |  2 +-
 include/rdma/ib_umem.h                |  1 +
 8 files changed, 161 insertions(+), 12 deletions(-)

-- 
2.53.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] RDMA/umem: Change for rdma devices has not dma device
  2026-03-26  5:27 [PATCH 0/2] RDMA/rxe: Add dma-buf support for Soft-RoCE Zhu Yanjun
@ 2026-03-26  5:27 ` Zhu Yanjun
  2026-03-26  5:27 ` [PATCH 2/2] RDMA/rxe: Add dma-buf support Zhu Yanjun
  1 sibling, 0 replies; 3+ messages in thread
From: Zhu Yanjun @ 2026-03-26  5:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma, yanjun.zhu, mie

Current implementation requires a dma device for RDMA driver to use
dma-buf memory space as RDMA buffer.

Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
---
 drivers/infiniband/core/umem_dmabuf.c | 35 ++++++++++++++++++++++++++-
 include/rdma/ib_umem.h                |  1 +
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index d30f24b90bca..65c5f09f380f 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -142,6 +142,8 @@ ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
 		goto out_release_dmabuf;
 	}
 
+	umem_dmabuf->dmabuf = dmabuf;
+
 	umem = &umem_dmabuf->umem;
 	umem->ibdev = device;
 	umem->length = size;
@@ -152,6 +154,24 @@ ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
 	if (!ib_umem_num_pages(umem))
 		goto out_free_umem;
 
+	/* Software RDMA drivers has not dma device. Just get dmabuf from fd */
+	if (!device->dma_device) {
+		struct sg_table *sgt;
+
+		dma_resv_lock(dmabuf->resv, NULL);
+		sgt = dmabuf->ops->map_dma_buf(NULL, DMA_BIDIRECTIONAL);
+		dma_resv_unlock(dmabuf->resv);
+		if (IS_ERR(sgt)) {
+			ret = ERR_CAST(sgt);
+			goto out_free_umem;
+		}
+		umem_dmabuf->sgt = sgt;
+		goto done;
+	}
+
+	if (unlikely(!ops || !ops->move_notify))
+		goto out_free_umem;
+
 	umem_dmabuf->attach = dma_buf_dynamic_attach(
 					dmabuf,
 					dma_device,
@@ -161,6 +181,7 @@ ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
 		ret = ERR_CAST(umem_dmabuf->attach);
 		goto out_free_umem;
 	}
+done:
 	return umem_dmabuf;
 
 out_free_umem:
@@ -260,11 +281,23 @@ EXPORT_SYMBOL(ib_umem_dmabuf_revoke);
 
 void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
 {
-	struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+	struct dma_buf *dmabuf = umem_dmabuf->dmabuf;
+
+	if (!umem_dmabuf->attach) {
+		if (umem_dmabuf->sgt) {
+			dma_resv_lock(dmabuf->resv, NULL);
+			dmabuf->ops->unmap_dma_buf(NULL, umem_dmabuf->sgt,
+							DMA_BIDIRECTIONAL);
+			dma_resv_unlock(dmabuf->resv);
+		}
+		goto free_dmabuf;
+	}
 
 	ib_umem_dmabuf_revoke(umem_dmabuf);
 
 	dma_buf_detach(dmabuf, umem_dmabuf->attach);
+
+free_dmabuf:
 	dma_buf_put(dmabuf);
 	kfree(umem_dmabuf);
 }
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 0a8e092c0ea8..6eb5760d5ca3 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -31,6 +31,7 @@ struct ib_umem {
 struct ib_umem_dmabuf {
 	struct ib_umem umem;
 	struct dma_buf_attachment *attach;
+	struct dma_buf *dmabuf;
 	struct sg_table *sgt;
 	struct scatterlist *first_sg;
 	struct scatterlist *last_sg;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] RDMA/rxe: Add dma-buf support
  2026-03-26  5:27 [PATCH 0/2] RDMA/rxe: Add dma-buf support for Soft-RoCE Zhu Yanjun
  2026-03-26  5:27 ` [PATCH 1/2] RDMA/umem: Change for rdma devices has not dma device Zhu Yanjun
@ 2026-03-26  5:27 ` Zhu Yanjun
  1 sibling, 0 replies; 3+ messages in thread
From: Zhu Yanjun @ 2026-03-26  5:27 UTC (permalink / raw)
  To: jgg, leon, zyjzyj2000, linux-rdma, yanjun.zhu, mie

Implement a ib device operation ‘reg_user_mr_dmabuf’. Generate a
rxe_map from the memory space linked the passed dma-buf.

Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
---
 drivers/infiniband/sw/rxe/rxe.c       |  2 +
 drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
 drivers/infiniband/sw/rxe/rxe_mr.c    | 89 ++++++++++++++++++++++++---
 drivers/infiniband/sw/rxe/rxe_odp.c   |  2 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c | 40 ++++++++++++
 drivers/infiniband/sw/rxe/rxe_verbs.h |  2 +-
 6 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index e891199cbdef..9920ea3104be 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -278,3 +278,5 @@ late_initcall(rxe_module_init);
 module_exit(rxe_module_exit);
 
 MODULE_ALIAS_RDMA_LINK("rxe");
+
+MODULE_IMPORT_NS("DMA_BUF");
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 7992290886e1..dc9a56450c82 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -66,6 +66,8 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
 		unsigned int length, enum rxe_mr_copy_dir dir);
+int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 length,
+			    u64 iova, int access, struct rxe_mr *mr);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
 	      void *addr, int length, enum rxe_mr_copy_dir dir);
 int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index c696ff874980..5c129a488b83 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/libnvdimm.h>
+#include <linux/dma-buf.h>
 
 #include "rxe.h"
 #include "rxe_loc.h"
@@ -90,7 +91,7 @@ static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
 {
 	int idx;
 
-	if (mr_page_size(mr) > PAGE_SIZE)
+	if (rxe_mr_page_size(mr) > PAGE_SIZE)
 		idx = (iova - (mr->ibmr.iova & mr->page_mask)) >> PAGE_SHIFT;
 	else
 		idx = (iova >> mr->page_shift) -
@@ -103,15 +104,15 @@ static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
 /*
  * Convert iova to offset within the page_info entry.
  *
- * For mr_page_size > PAGE_SIZE, the offset is within the system page.
- * For mr_page_size <= PAGE_SIZE, the offset is within the MR page size.
+ * For rxe_mr_page_size > PAGE_SIZE, the offset is within the system page.
+ * For rxe_mr_page_size <= PAGE_SIZE, the offset is within the MR page size.
  */
 static unsigned long rxe_mr_iova_to_page_offset(struct rxe_mr *mr, u64 iova)
 {
-	if (mr_page_size(mr) > PAGE_SIZE)
+	if (rxe_mr_page_size(mr) > PAGE_SIZE)
 		return iova & (PAGE_SIZE - 1);
 	else
-		return iova & (mr_page_size(mr) - 1);
+		return iova & (rxe_mr_page_size(mr) - 1);
 }
 
 static bool is_pmem_page(struct page *pg)
@@ -129,7 +130,7 @@ static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
 	struct page *page;
 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
 
-	WARN_ON(mr_page_size(mr) != PAGE_SIZE);
+	WARN_ON(rxe_mr_page_size(mr) != PAGE_SIZE);
 
 	__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
 	if (!__sg_page_iter_next(&sg_iter))
@@ -224,6 +225,75 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
 	return err;
 }
 
+static int rxe_map_dmabuf_mr(struct rxe_mr *mr, struct ib_umem_dmabuf *umem_dmabuf)
+{
+	unsigned int page_size = rxe_mr_page_size(mr);
+	struct sg_table *sgt = umem_dmabuf->sgt;
+	struct scatterlist *sg;
+	struct page *page;
+	int i, j, n = 0;
+
+	mr->page_shift = ilog2(page_size);
+	mr->page_mask = ~((u64)page_size - 1);
+	mr->nbuf = 0;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		page = sg_page(sg);
+		for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) {
+			mr->page_info[n].page = page + j;
+			mr->page_info[n].offset = 0;
+			n++;
+		}
+	}
+
+	mr->nbuf = n;
+	return 0;
+}
+
+int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 length,
+			    u64 iova, int access, struct rxe_mr *mr)
+{
+	struct ib_umem_dmabuf *umem_dmabuf;
+	int err;
+
+	umem_dmabuf = ib_umem_dmabuf_get(pd->ibpd.device, start, length, fd,
+					 access, NULL);
+	if (IS_ERR(umem_dmabuf)) {
+		err = PTR_ERR(umem_dmabuf);
+		goto err_out;
+	}
+
+	rxe_mr_init(access, mr);
+
+	err = alloc_mr_page_info(mr, ib_umem_num_pages(&umem_dmabuf->umem));
+	if (err) {
+		pr_warn("%s: Unable to allocate memory for map\n", __func__);
+		goto err_release_umem;
+	}
+
+	mr->ibmr.pd = &pd->ibpd;
+	mr->ibmr.iova = iova;
+	mr->umem = &umem_dmabuf->umem;
+	mr->access = access;
+	mr->state = RXE_MR_STATE_VALID;
+	mr->ibmr.type = IB_MR_TYPE_USER;
+
+	err = rxe_map_dmabuf_mr(mr, umem_dmabuf);
+	if (err)
+		goto err_free_mr_map;
+
+	return 0;
+
+err_free_mr_map:
+	free_mr_page_info(mr);
+
+err_release_umem:
+	ib_umem_release(&umem_dmabuf->umem);
+
+err_out:
+	return err;
+}
+
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
 {
 	int err;
@@ -260,7 +330,7 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr)
 {
 	struct rxe_mr *mr = to_rmr(ibmr);
 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
-	u32 i, pages_per_mr = mr_page_size(mr) >> PAGE_SHIFT;
+	u32 i, pages_per_mr = rxe_mr_page_size(mr) >> PAGE_SHIFT;
 
 	pages_per_mr = MAX(1, pages_per_mr);
 
@@ -288,7 +358,7 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
 		  int sg_nents, unsigned int *sg_offset)
 {
 	struct rxe_mr *mr = to_rmr(ibmr);
-	unsigned int page_size = mr_page_size(mr);
+	unsigned int page_size = rxe_mr_page_size(mr);
 
 	/*
 	 * Ensure page_size and PAGE_SIZE are compatible for mapping.
@@ -302,7 +372,7 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
 		return -EINVAL;
 	}
 
-	if (mr_page_size(mr) > PAGE_SIZE) {
+	if (rxe_mr_page_size(mr) > PAGE_SIZE) {
 		/* resize page_info if needed */
 		u32 map_mr_pages = (page_size >> PAGE_SHIFT) * mr->num_buf;
 
@@ -809,6 +879,7 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
 
 	rxe_put(mr_pd(mr));
+
 	ib_umem_release(mr->umem);
 
 	if (mr->ibmr.type != IB_MR_TYPE_DMA)
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
index bc11b1ec59ac..12c48f0cae47 100644
--- a/drivers/infiniband/sw/rxe/rxe_odp.c
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -351,7 +351,7 @@ int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
 		page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
 
 		bytes = min_t(unsigned int, length,
-			      mr_page_size(mr) - page_offset);
+			      rxe_mr_page_size(mr) - page_offset);
 
 		va = kmap_local_page(page);
 		arch_wb_cache_pmem(va + page_offset, bytes);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index fe41362c5144..1b5381b14d4b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1358,6 +1358,45 @@ static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags,
 	return NULL;
 }
 
+static struct ib_mr *rxe_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+					    u64 length, u64 iova, int fd,
+					    int access, struct ib_dmah *dmah,
+					    struct uverbs_attr_bundle *udata)
+{
+	int err;
+	struct rxe_dev *rxe = to_rdev(ibpd->device);
+	struct rxe_pd *pd = to_rpd(ibpd);
+	struct rxe_mr *mr;
+
+	mr = kzalloc_obj(*mr);
+	if (!mr) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	err = rxe_add_to_pool(&rxe->mr_pool, mr);
+	if (err)
+		goto err_free;
+
+	rxe_get(pd);
+
+	err = rxe_mr_dmabuf_init_user(pd, fd, start, length, iova, access, mr);
+	if (err)
+		goto err3;
+
+	return &mr->ibmr;
+
+err3:
+	rxe_put(pd);
+	rxe_put(mr);
+
+err_free:
+	kfree(mr);
+
+err_out:
+	return ERR_PTR(err);
+}
+
 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 				  u32 max_num_sg)
 {
@@ -1517,6 +1556,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.query_qp = rxe_query_qp,
 	.query_srq = rxe_query_srq,
 	.reg_user_mr = rxe_reg_user_mr,
+	.reg_user_mr_dmabuf = rxe_reg_user_mr_dmabuf,
 	.req_notify_cq = rxe_req_notify_cq,
 	.rereg_user_mr = rxe_rereg_user_mr,
 	.resize_cq = rxe_resize_cq,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index fb149f37e91d..9d77bec0bf3c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -364,7 +364,7 @@ struct rxe_mr {
 	struct rxe_mr_page	*page_info;
 };
 
-static inline unsigned int mr_page_size(struct rxe_mr *mr)
+static inline unsigned int rxe_mr_page_size(struct rxe_mr *mr)
 {
 	return mr ? mr->ibmr.page_size : PAGE_SIZE;
 }
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-03-26  5:28 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-26  5:27 [PATCH 0/2] RDMA/rxe: Add dma-buf support for Soft-RoCE Zhu Yanjun
2026-03-26  5:27 ` [PATCH 1/2] RDMA/umem: Change for rdma devices has not dma device Zhu Yanjun
2026-03-26  5:27 ` [PATCH 2/2] RDMA/rxe: Add dma-buf support Zhu Yanjun

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox