From: Michael Margolin <mrgolin@amazon.com>
To: <jgg@nvidia.com>, <leon@kernel.org>, <linux-rdma@vger.kernel.org>
Cc: <sleybo@amazon.com>, <matua@amazon.com>, <gal.pressman@linux.dev>,
"Daniel Kranzdorf" <dkkranzd@amazon.com>,
Yonatan Nachum <ynachum@amazon.com>
Subject: [PATCH for-next 2/2] RDMA/efa: Add CQ with external memory support
Date: Tue, 1 Jul 2025 23:15:45 +0000 [thread overview]
Message-ID: <20250701231545.14282-3-mrgolin@amazon.com> (raw)
In-Reply-To: <20250701231545.14282-1-mrgolin@amazon.com>
Add an option to create CQ using external memory instead of allocating
in the driver. The memory can be passed from userspace by dmabuf fd and
an offset or a VA. One of the possible usages is creating CQs that
reside in accelerator memory, allowing low latency asynchronous direct
polling from the accelerator device. Add a capability bit to reflect on
the feature support.
Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com>
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
---
drivers/infiniband/hw/efa/efa_main.c | 1 +
drivers/infiniband/hw/efa/efa_verbs.c | 47 ++++++++++++++++++++-------
include/uapi/rdma/efa-abi.h | 3 +-
3 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 4f03c0ec819f..b066e8387df0 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -366,6 +366,7 @@ static const struct ib_device_ops efa_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_EFA,
.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
+ .uverbs_support_cq_with_umem = 1,
.alloc_hw_port_stats = efa_alloc_hw_port_stats,
.alloc_hw_device_stats = efa_alloc_hw_device_stats,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index a8645a40730f..5988f89837bd 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -249,6 +249,7 @@ int efa_query_device(struct ib_device *ibdev,
resp.max_rdma_size = dev_attr->max_rdma_size;
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM;
if (EFA_DEV_CAP(dev, RDMA_READ))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
@@ -1082,8 +1083,11 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
xa_erase(&dev->cqs_xa, cq->cq_idx);
synchronize_irq(cq->eq->irq.irqn);
}
- efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
- DMA_FROM_DEVICE);
+
+ if (ibcq->umem)
+ ib_umem_release(ibcq->umem);
+ else
+ efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE);
return 0;
}
@@ -1133,8 +1137,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct efa_com_create_cq_result result;
struct ib_device *ibdev = ibcq->device;
struct efa_dev *dev = to_edev(ibdev);
+ struct ib_umem *umem = ibcq->umem;
struct efa_ibv_create_cq cmd = {};
struct efa_cq *cq = to_ecq(ibcq);
+ struct scatterlist *umem_sgl;
int entries = attr->cqe;
bool set_src_addr;
int err;
@@ -1202,11 +1208,24 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->ucontext = ucontext;
cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
- cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
- DMA_FROM_DEVICE);
- if (!cq->cpu_addr) {
- err = -ENOMEM;
- goto err_out;
+
+ if (umem) {
+ umem_sgl = ibcq->umem->sgt_append.sgt.sgl;
+ if (sg_dma_len(umem_sgl) < ib_umem_offset(umem) + cq->size) {
+ ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n");
+ err = -EINVAL;
+ goto err_free_mem;
+ }
+
+ cq->cpu_addr = NULL;
+ cq->dma_addr = sg_dma_address(umem_sgl) + ib_umem_offset(umem);
+ } else {
+ cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+ if (!cq->cpu_addr) {
+ err = -ENOMEM;
+ goto err_out;
+ }
}
params.uarn = cq->ucontext->uarn;
@@ -1223,7 +1242,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
err = efa_com_create_cq(&dev->edev, ¶ms, &result);
if (err)
- goto err_free_mapped;
+ goto err_free_mem;
resp.db_off = result.db_off;
resp.cq_idx = result.cq_idx;
@@ -1231,7 +1250,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->ibcq.cqe = result.actual_depth;
WARN_ON_ONCE(entries != result.actual_depth);
- err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
+ if (!umem)
+ err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
+
if (err) {
ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
cq->cq_idx);
@@ -1269,9 +1290,11 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
efa_cq_user_mmap_entries_remove(cq);
err_destroy_cq:
efa_destroy_cq_idx(dev, cq->cq_idx);
-err_free_mapped:
- efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
- DMA_FROM_DEVICE);
+err_free_mem:
+ if (umem)
+ ib_umem_release(umem);
+ else
+ efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE);
err_out:
atomic64_inc(&dev->stats.create_cq_err);
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index 11b94b0b035b..98b71b9979f8 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef EFA_ABI_USER_H
@@ -131,6 +131,7 @@ enum {
EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4,
EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5,
EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6,
+ EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM = 1 << 7,
};
struct efa_ibv_ex_query_device_resp {
--
2.47.1
next prev parent reply other threads:[~2025-07-01 23:16 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-01 23:15 [PATCH for-next 0/2] RDMA: Support CQs with user memory Michael Margolin
2025-07-01 23:15 ` [PATCH for-next 1/2] RDMA/uverbs: Add a common way to create CQ with umem Michael Margolin
2025-07-04 18:14 ` Jason Gunthorpe
2025-07-08 20:13 ` Margolin, Michael
2025-07-01 23:15 ` Michael Margolin [this message]
2025-07-04 18:28 ` [PATCH for-next 2/2] RDMA/efa: Add CQ with external memory support Jason Gunthorpe
2025-07-07 17:07 ` Margolin, Michael
2025-07-07 17:16 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250701231545.14282-3-mrgolin@amazon.com \
--to=mrgolin@amazon.com \
--cc=dkkranzd@amazon.com \
--cc=gal.pressman@linux.dev \
--cc=jgg@nvidia.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=matua@amazon.com \
--cc=sleybo@amazon.com \
--cc=ynachum@amazon.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox