public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next] RDMA/efa: Add CQ with external memory support
@ 2025-05-15 14:50 Michael Margolin
  2025-05-18  6:42 ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Michael Margolin @ 2025-05-15 14:50 UTC (permalink / raw)
  To: jgg, leon, linux-rdma
  Cc: sleybo, matua, gal.pressman, Daniel Kranzdorf, Yonatan Nachum

Add an option to create CQ using external memory instead of allocating
in the driver. The memory can be passed from userspace by dmabuf fd and
an offset. Add a capability bit to reflect on the feature support.

Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com>
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
---
 drivers/infiniband/hw/efa/efa.h       |  1 +
 drivers/infiniband/hw/efa/efa_verbs.c | 60 ++++++++++++++++++++++-----
 include/uapi/rdma/efa-abi.h           |  8 +++-
 3 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 838182d0409c..fd609a50cea0 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -107,6 +107,7 @@ struct efa_cq {
 	u16 cq_idx;
 	/* NULL when no interrupts requested */
 	struct efa_eq *eq;
+	struct ib_umem *umem;
 };
 
 struct efa_qp {
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index a8645a40730f..ecebc47ff249 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -249,6 +249,7 @@ int efa_query_device(struct ib_device *ibdev,
 		resp.max_rdma_size = dev_attr->max_rdma_size;
 
 		resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
+		resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM_DMABUF;
 		if (EFA_DEV_CAP(dev, RDMA_READ))
 			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
 
@@ -1082,8 +1083,11 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 		xa_erase(&dev->cqs_xa, cq->cq_idx);
 		synchronize_irq(cq->eq->irq.irqn);
 	}
-	efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
-			DMA_FROM_DEVICE);
+
+	if (cq->umem)
+		ib_umem_release(cq->umem);
+	else
+		efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE);
 	return 0;
 }
 
@@ -1133,8 +1137,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct efa_com_create_cq_result result;
 	struct ib_device *ibdev = ibcq->device;
 	struct efa_dev *dev = to_edev(ibdev);
+	struct ib_umem_dmabuf *umem_dmabuf;
 	struct efa_ibv_create_cq cmd = {};
 	struct efa_cq *cq = to_ecq(ibcq);
+	struct scatterlist *umem_sgl;
 	int entries = attr->cqe;
 	bool set_src_addr;
 	int err;
@@ -1202,11 +1208,40 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	cq->ucontext = ucontext;
 	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
-	cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
-					 DMA_FROM_DEVICE);
-	if (!cq->cpu_addr) {
-		err = -ENOMEM;
-		goto err_out;
+
+	if (cmd.flags & EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF) {
+		if (cmd.ext_mem_length < cq->size) {
+			ibdev_dbg(&dev->ibdev, "External memory too small\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, cmd.ext_mem_offset,
+							cq->size, cmd.ext_mem_fd,
+							IB_ACCESS_LOCAL_WRITE);
+		if (IS_ERR(umem_dmabuf)) {
+			err = PTR_ERR(umem_dmabuf);
+			ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
+			goto err_out;
+		}
+		cq->umem = &umem_dmabuf->umem;
+		umem_sgl = cq->umem->sgt_append.sgt.sgl;
+
+		if (sg_dma_len(umem_sgl) < ib_umem_offset(cq->umem) + cq->size) {
+			ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n");
+			err = -EINVAL;
+			goto err_free_mapped;
+		}
+
+		cq->cpu_addr = NULL;
+		cq->dma_addr = sg_dma_address(umem_sgl) + ib_umem_offset(cq->umem);
+	} else {
+		cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
+						 DMA_FROM_DEVICE);
+		if (!cq->cpu_addr) {
+			err = -ENOMEM;
+			goto err_out;
+		}
 	}
 
 	params.uarn = cq->ucontext->uarn;
@@ -1231,7 +1266,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	cq->ibcq.cqe = result.actual_depth;
 	WARN_ON_ONCE(entries != result.actual_depth);
 
-	err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
+	if (!(cmd.flags & EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF))
+		err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
+
 	if (err) {
 		ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
 			  cq->cq_idx);
@@ -1270,8 +1307,11 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 err_destroy_cq:
 	efa_destroy_cq_idx(dev, cq->cq_idx);
 err_free_mapped:
-	efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
-			DMA_FROM_DEVICE);
+	if (cq->umem)
+		ib_umem_release(cq->umem);
+	else
+		efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
+				DMA_FROM_DEVICE);
 
 err_out:
 	atomic64_inc(&dev->stats.create_cq_err);
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index 11b94b0b035b..f2bcef789571 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
 /*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef EFA_ABI_USER_H
@@ -56,6 +56,7 @@ struct efa_ibv_alloc_pd_resp {
 enum {
 	EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0,
 	EFA_CREATE_CQ_WITH_SGID               = 1 << 1,
+	EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF     = 1 << 2,
 };
 
 struct efa_ibv_create_cq {
@@ -64,6 +65,10 @@ struct efa_ibv_create_cq {
 	__u16 num_sub_cqs;
 	__u8 flags;
 	__u8 reserved_58[5];
+	__aligned_u64 ext_mem_offset;
+	__aligned_u64 ext_mem_length;
+	__u32 ext_mem_fd;
+	__u8 reserved_120[4];
 };
 
 enum {
@@ -131,6 +136,7 @@ enum {
 	EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4,
 	EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5,
 	EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6,
+	EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 7,
 };
 
 struct efa_ibv_ex_query_device_resp {
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-15 14:50 [PATCH for-next] RDMA/efa: Add CQ with external memory support Michael Margolin
@ 2025-05-18  6:42 ` Leon Romanovsky
  2025-05-18  8:56   ` Margolin, Michael
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2025-05-18  6:42 UTC (permalink / raw)
  To: Michael Margolin
  Cc: jgg, linux-rdma, sleybo, matua, gal.pressman, Daniel Kranzdorf,
	Yonatan Nachum

On Thu, May 15, 2025 at 02:50:40PM +0000, Michael Margolin wrote:
> Add an option to create CQ using external memory instead of allocating
> in the driver. The memory can be passed from userspace by dmabuf fd and
> an offset.

EFA is unique here. This patch is missing description of why it is
needed, and why existing solutions if any exist, can't be used.

Thanks

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-18  6:42 ` Leon Romanovsky
@ 2025-05-18  8:56   ` Margolin, Michael
  2025-05-20  9:16     ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Margolin, Michael @ 2025-05-18  8:56 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: jgg, linux-rdma, sleybo, matua, gal.pressman, Daniel Kranzdorf,
	Yonatan Nachum


On 5/18/2025 9:42 AM, Leon Romanovsky wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Thu, May 15, 2025 at 02:50:40PM +0000, Michael Margolin wrote:
>> Add an option to create CQ using external memory instead of allocating
>> in the driver. The memory can be passed from userspace by dmabuf fd and
>> an offset.
> EFA is unique here. This patch is missing description of why it is
> needed, and why existing solutions if any exist, can't be used.
>
> Thanks

I probably should have explained more, the purpose is creating CQs that 
reside in GPU HBM enabling low latency polling directly by the GPU. EFA 
isn't unique in receiving pre-allocated memory from userspace, the 
extension here is the use of dmabuf for that purpose as a general 
mechanism that allows using memory independent of its source. I will add 
more info in the commit message.

Michael


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-18  8:56   ` Margolin, Michael
@ 2025-05-20  9:16     ` Leon Romanovsky
  2025-05-21 15:19       ` Margolin, Michael
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2025-05-20  9:16 UTC (permalink / raw)
  To: Margolin, Michael
  Cc: jgg, linux-rdma, sleybo, matua, gal.pressman, Daniel Kranzdorf,
	Yonatan Nachum

On Sun, May 18, 2025 at 11:56:56AM +0300, Margolin, Michael wrote:
> 
> On 5/18/2025 9:42 AM, Leon Romanovsky wrote:
> > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > 
> > 
> > 
> > On Thu, May 15, 2025 at 02:50:40PM +0000, Michael Margolin wrote:
> > > Add an option to create CQ using external memory instead of allocating
> > > in the driver. The memory can be passed from userspace by dmabuf fd and
> > > an offset.
> > EFA is unique here. This patch is missing description of why it is
> > needed, and why existing solutions if any exist, can't be used.
> > 
> > Thanks
> 
> I probably should have explained more, the purpose is creating CQs that
> reside in GPU HBM enabling low latency polling directly by the GPU. EFA
> isn't unique in receiving pre-allocated memory from userspace, the extension
> here is the use of dmabuf for that purpose as a general mechanism that
> allows using memory independent of its source. I will add more info in the
> commit message.

I think that this functionality is worth to have as general verb and not DV.
mlx5 has something similar and now EFA needs it too.

Let's wait for Jason's response before rushing to implement it.

Thanks


> 
> Michael
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-20  9:16     ` Leon Romanovsky
@ 2025-05-21 15:19       ` Margolin, Michael
  2025-05-25 17:52         ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Margolin, Michael @ 2025-05-21 15:19 UTC (permalink / raw)
  To: Leon Romanovsky, jgg
  Cc: linux-rdma, sleybo, matua, gal.pressman, Daniel Kranzdorf,
	Yonatan Nachum


On 5/20/2025 12:16 PM, Leon Romanovsky wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Sun, May 18, 2025 at 11:56:56AM +0300, Margolin, Michael wrote:
>> On 5/18/2025 9:42 AM, Leon Romanovsky wrote:
>>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>>
>>>
>>>
>>> On Thu, May 15, 2025 at 02:50:40PM +0000, Michael Margolin wrote:
>>>> Add an option to create CQ using external memory instead of allocating
>>>> in the driver. The memory can be passed from userspace by dmabuf fd and
>>>> an offset.
>>> EFA is unique here. This patch is missing description of why it is
>>> needed, and why existing solutions if any exist, can't be used.
>>>
>>> Thanks
>> I probably should have explained more, the purpose is creating CQs that
>> reside in GPU HBM enabling low latency polling directly by the GPU. EFA
>> isn't unique in receiving pre-allocated memory from userspace, the extension
>> here is the use of dmabuf for that purpose as a general mechanism that
>> allows using memory independent of its source. I will add more info in the
>> commit message.
> I think that this functionality is worth to have as general verb and not DV.
> mlx5 has something similar and now EFA needs it too.
>
> Let's wait for Jason's response before rushing to implement it.
>
> Thanks

Jason, any thoughts on this?

We can probably add optional attributes to create CQ command, but most 
of the handling will remain vendor specific. I'm not convinced it's 
beneficial enough.

Michael


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-21 15:19       ` Margolin, Michael
@ 2025-05-25 17:52         ` Jason Gunthorpe
  2025-05-26 15:45           ` Margolin, Michael
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2025-05-25 17:52 UTC (permalink / raw)
  To: Margolin, Michael
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum

On Wed, May 21, 2025 at 06:19:51PM +0300, Margolin, Michael wrote:
> 
> On 5/20/2025 12:16 PM, Leon Romanovsky wrote:
> > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > 
> > 
> > 
> > On Sun, May 18, 2025 at 11:56:56AM +0300, Margolin, Michael wrote:
> > > On 5/18/2025 9:42 AM, Leon Romanovsky wrote:
> > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > > > 
> > > > 
> > > > 
> > > > On Thu, May 15, 2025 at 02:50:40PM +0000, Michael Margolin wrote:
> > > > > Add an option to create CQ using external memory instead of allocating
> > > > > in the driver. The memory can be passed from userspace by dmabuf fd and
> > > > > an offset.
> > > > EFA is unique here. This patch is missing description of why it is
> > > > needed, and why existing solutions if any exist, can't be used.
> > > > 
> > > > Thanks
> > > I probably should have explained more, the purpose is creating CQs that
> > > reside in GPU HBM enabling low latency polling directly by the GPU. EFA
> > > isn't unique in receiving pre-allocated memory from userspace, the extension
> > > here is the use of dmabuf for that purpose as a general mechanism that
> > > allows using memory independent of its source. I will add more info in the
> > > commit message.
> > I think that this functionality is worth to have as general verb and not DV.
> > mlx5 has something similar and now EFA needs it too.
> > 
> > Let's wait for Jason's response before rushing to implement it.
> > 
> > Thanks
> 
> Jason, any thoughts on this?
> 
> We can probably add optional attributes to create CQ command, but most of
> the handling will remain vendor specific. I'm not convinced it's beneficial
> enough.

I don't know how a general verb would work, CQs in non-dv are polled
in software in userspace and the userspace will have trouble reaching
into a dmabuf. Plus the entire point of this is usually to write the
polling code in a GPU language and run it on a GPU processor.

Meaning I think all users of this will want to use a DV interface from
verbs.

At that point, is it worth adding more common verbs support?

Though it is becoming a bit messy that drivers are all open coding
creating normal or dmabuf umems. That part might be worth generalizing
some more, and then doing QP as well.

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-25 17:52         ` Jason Gunthorpe
@ 2025-05-26 15:45           ` Margolin, Michael
  2025-05-26 16:08             ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Margolin, Michael @ 2025-05-26 15:45 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum


On 5/25/2025 8:52 PM, Jason Gunthorpe wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Wed, May 21, 2025 at 06:19:51PM +0300, Margolin, Michael wrote:
>> On 5/20/2025 12:16 PM, Leon Romanovsky wrote:
>>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>>
>>>
>>>
>>> On Sun, May 18, 2025 at 11:56:56AM +0300, Margolin, Michael wrote:
>>>> On 5/18/2025 9:42 AM, Leon Romanovsky wrote:
>>>>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>>>>
>>>>>
>>>>>
>>>>> On Thu, May 15, 2025 at 02:50:40PM +0000, Michael Margolin wrote:
>>>>>> Add an option to create CQ using external memory instead of allocating
>>>>>> in the driver. The memory can be passed from userspace by dmabuf fd and
>>>>>> an offset.
>>>>> EFA is unique here. This patch is missing description of why it is
>>>>> needed, and why existing solutions if any exist, can't be used.
>>>>>
>>>>> Thanks
>>>> I probably should have explained more, the purpose is creating CQs that
>>>> reside in GPU HBM enabling low latency polling directly by the GPU. EFA
>>>> isn't unique in receiving pre-allocated memory from userspace, the extension
>>>> here is the use of dmabuf for that purpose as a general mechanism that
>>>> allows using memory independent of its source. I will add more info in the
>>>> commit message.
>>> I think that this functionality is worth to have as general verb and not DV.
>>> mlx5 has something similar and now EFA needs it too.
>>>
>>> Let's wait for Jason's response before rushing to implement it.
>>>
>>> Thanks
>> Jason, any thoughts on this?
>>
>> We can probably add optional attributes to create CQ command, but most of
>> the handling will remain vendor specific. I'm not convinced it's beneficial
>> enough.
> I don't know how a general verb would work, CQs in non-dv are polled
> in software in userspace and the userspace will have trouble reaching
> into a dmabuf. Plus the entire point of this is usually to write the
> polling code in a GPU language and run it on a GPU processor.
>
> Meaning I think all users of this will want to use a DV interface from
> verbs.
>
> At that point, is it worth adding more common verbs support?
>
> Though it is becoming a bit messy that drivers are all open coding
> creating normal or dmabuf umems. That part might be worth generalizing
> some more, and then doing QP as well.
>
> Jason

Are you suggesting turning mlx5dv_devx_umem_reg into a common verb 
including the kernel part or some kind of rdma-core level abstraction 
for passing dmabuf+offset+length / address+length to a create CQ/QP 
function?

Michael


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-26 15:45           ` Margolin, Michael
@ 2025-05-26 16:08             ` Jason Gunthorpe
  2025-05-26 16:17               ` Margolin, Michael
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2025-05-26 16:08 UTC (permalink / raw)
  To: Margolin, Michael
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum

On Mon, May 26, 2025 at 06:45:59PM +0300, Margolin, Michael wrote:

> Are you suggesting turning mlx5dv_devx_umem_reg into a common verb including
> the kernel part or some kind of rdma-core level abstraction for passing
> dmabuf+offset+length / address+length to a create CQ/QP function?

I think Leon was, but I'm not sure that is so worthwhile.

I was thinking more of having the ioctls for things like QP/CQ/MR
accept a more standard common set of attributes to describe the buffer
memory and then making it simpler for the driver to get a umem from
those common attributes.

But EFA is alread sort of different because it normally uses a kernel
allocated buffer, right?

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-26 16:08             ` Jason Gunthorpe
@ 2025-05-26 16:17               ` Margolin, Michael
  2025-06-09 15:03                 ` Margolin, Michael
  0 siblings, 1 reply; 12+ messages in thread
From: Margolin, Michael @ 2025-05-26 16:17 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum


On 5/26/2025 7:08 PM, Jason Gunthorpe wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Mon, May 26, 2025 at 06:45:59PM +0300, Margolin, Michael wrote:
>
>> Are you suggesting turning mlx5dv_devx_umem_reg into a common verb including
>> the kernel part or some kind of rdma-core level abstraction for passing
>> dmabuf+offset+length / address+length to a create CQ/QP function?
> I think Leon was, but I'm not sure that is so worthwhile.
>
> I was thinking more of having the ioctls for things like QP/CQ/MR
> accept a more standard common set of attributes to describe the buffer
> memory and then making it simpler for the driver to get a umem from
> those common attributes.
>
> But EFA is alread sort of different because it normally uses a kernel
> allocated buffer, right?
>
> Jason

Yes, EFA is an example for a driver that doesn't need this on the 
"standard" flow.

Michael


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-05-26 16:17               ` Margolin, Michael
@ 2025-06-09 15:03                 ` Margolin, Michael
  2025-06-11 19:47                   ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Margolin, Michael @ 2025-06-09 15:03 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum


On 5/26/2025 7:17 PM, Margolin, Michael wrote:
>
> On 5/26/2025 7:08 PM, Jason Gunthorpe wrote:
>> CAUTION: This email originated from outside of the organization. Do 
>> not click links or open attachments unless you can confirm the sender 
>> and know the content is safe.
>>
>>
>>
>> On Mon, May 26, 2025 at 06:45:59PM +0300, Margolin, Michael wrote:
>>
>>> Are you suggesting turning mlx5dv_devx_umem_reg into a common verb 
>>> including
>>> the kernel part or some kind of rdma-core level abstraction for passing
>>> dmabuf+offset+length / address+length to a create CQ/QP function?
>> I think Leon was, but I'm not sure that is so worthwhile.
>>
>> I was thinking more of having the ioctls for things like QP/CQ/MR
>> accept a more standard common set of attributes to describe the buffer
>> memory and then making it simpler for the driver to get a umem from
>> those common attributes.
>>
>> But EFA is alread sort of different because it normally uses a kernel
>> allocated buffer, right?
>>
>> Jason
>
> Yes, EFA is an example for a driver that doesn't need this on the 
> "standard" flow.
>
> Michael
>
How can we move forward with this patch? It's possible to add additional 
attributes to the common create CQ ioctl and use it for EFA direct verbs 
but it won't be easy to move existing drivers to use it.


Michael


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-06-09 15:03                 ` Margolin, Michael
@ 2025-06-11 19:47                   ` Jason Gunthorpe
  2025-06-24 16:39                     ` Margolin, Michael
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2025-06-11 19:47 UTC (permalink / raw)
  To: Margolin, Michael
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum

On Mon, Jun 09, 2025 at 06:03:32PM +0300, Margolin, Michael wrote:
> 
> On 5/26/2025 7:17 PM, Margolin, Michael wrote:
> > 
> > On 5/26/2025 7:08 PM, Jason Gunthorpe wrote:
> > > On Mon, May 26, 2025 at 06:45:59PM +0300, Margolin, Michael wrote:
> > > 
> > > > Are you suggesting turning mlx5dv_devx_umem_reg into a common
> > > > verb including
> > > > the kernel part or some kind of rdma-core level abstraction for passing
> > > > dmabuf+offset+length / address+length to a create CQ/QP function?
> > > I think Leon was, but I'm not sure that is so worthwhile.
> > > 
> > > I was thinking more of having the ioctls for things like QP/CQ/MR
> > > accept a more standard common set of attributes to describe the buffer
> > > memory and then making it simpler for the driver to get a umem from
> > > those common attributes.
> > > 
> > > But EFA is alread sort of different because it normally uses a kernel
> > > allocated buffer, right?
> > > 
> > > Jason
> > 
> > Yes, EFA is an example for a driver that doesn't need this on the
> > "standard" flow.
> > 
> > Michael
> > 
> How can we move forward with this patch? It's possible to add additional
> attributes to the common create CQ ioctl and use it for EFA direct verbs but
> it won't be easy to move existing drivers to use it.

Then it becomes hard to make the other drivers reject those new common
attributes :\

Maybe what you have here is the best, but it really does seem
unsatisfying from a kernel POV..

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH for-next] RDMA/efa: Add CQ with external memory support
  2025-06-11 19:47                   ` Jason Gunthorpe
@ 2025-06-24 16:39                     ` Margolin, Michael
  0 siblings, 0 replies; 12+ messages in thread
From: Margolin, Michael @ 2025-06-24 16:39 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, linux-rdma, sleybo, matua, gal.pressman,
	Daniel Kranzdorf, Yonatan Nachum


On 6/11/2025 10:47 PM, Jason Gunthorpe wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Mon, Jun 09, 2025 at 06:03:32PM +0300, Margolin, Michael wrote:
>> On 5/26/2025 7:17 PM, Margolin, Michael wrote:
>>> On 5/26/2025 7:08 PM, Jason Gunthorpe wrote:
>>>> On Mon, May 26, 2025 at 06:45:59PM +0300, Margolin, Michael wrote:
>>>>
>>>>> Are you suggesting turning mlx5dv_devx_umem_reg into a common
>>>>> verb including
>>>>> the kernel part or some kind of rdma-core level abstraction for passing
>>>>> dmabuf+offset+length / address+length to a create CQ/QP function?
>>>> I think Leon was, but I'm not sure that is so worthwhile.
>>>>
>>>> I was thinking more of having the ioctls for things like QP/CQ/MR
>>>> accept a more standard common set of attributes to describe the buffer
>>>> memory and then making it simpler for the driver to get a umem from
>>>> those common attributes.
>>>>
>>>> But EFA is alread sort of different because it normally uses a kernel
>>>> allocated buffer, right?
>>>>
>>>> Jason
>>> Yes, EFA is an example for a driver that doesn't need this on the
>>> "standard" flow.
>>>
>>> Michael
>>>
>> How can we move forward with this patch? It's possible to add additional
>> attributes to the common create CQ ioctl and use it for EFA direct verbs but
>> it won't be easy to move existing drivers to use it.
> Then it becomes hard to make the other drivers reject those new common
> attributes :\
>
> Maybe what you have here is the best, but it really does seem
> unsatisfying from a kernel POV..
>
> Jason

Apologies for a delayed response, I want to make sure we are aligned on 
the solution.

I'm going to add a common option in libibverbs for providers to pass 
buffer info for create CQ ioctl and when the new ioctl attributes are 
set to the kernel, handle it in core code by creating a umem object and 
storing it in CQ context for drivers use.

Does this make sense?


Michael


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-06-24 16:40 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-15 14:50 [PATCH for-next] RDMA/efa: Add CQ with external memory support Michael Margolin
2025-05-18  6:42 ` Leon Romanovsky
2025-05-18  8:56   ` Margolin, Michael
2025-05-20  9:16     ` Leon Romanovsky
2025-05-21 15:19       ` Margolin, Michael
2025-05-25 17:52         ` Jason Gunthorpe
2025-05-26 15:45           ` Margolin, Michael
2025-05-26 16:08             ` Jason Gunthorpe
2025-05-26 16:17               ` Margolin, Michael
2025-06-09 15:03                 ` Margolin, Michael
2025-06-11 19:47                   ` Jason Gunthorpe
2025-06-24 16:39                     ` Margolin, Michael

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox