From: Jiri Pirko <jiri@resnulli.us>
To: linux-rdma@vger.kernel.org
Cc: jgg@ziepe.ca, leon@kernel.org, mrgolin@amazon.com,
gal.pressman@linux.dev, sleybo@amazon.com, parav@nvidia.com,
mbloch@nvidia.com, yanjun.zhu@linux.dev,
marco.crivellari@suse.com, roman.gushchin@linux.dev,
phaddad@nvidia.com, lirongqing@baidu.com, ynachum@amazon.com,
huangjunxian6@hisilicon.com, kalesh-anakkur.purayil@broadcom.com,
ohartoov@nvidia.com, michaelgur@nvidia.com, shayd@nvidia.com,
edwards@nvidia.com, sriharsha.basavapatna@broadcom.com,
andrew.gospodarek@broadcom.com, selvin.xavier@broadcom.com
Subject: [PATCH rdma-next v2 01/15] RDMA/core: Introduce generic buffer descriptor infrastructure for umem
Date: Sat, 11 Apr 2026 16:49:01 +0200 [thread overview]
Message-ID: <20260411144915.114571-2-jiri@resnulli.us> (raw)
In-Reply-To: <20260411144915.114571-1-jiri@resnulli.us>
From: Jiri Pirko <jiri@nvidia.com>
Add a unified mechanism for userspace to pass memory buffers to any
uverbs command via a single UVERBS_ATTR_BUFFERS attribute. Each
buffer is described by struct ib_uverbs_buffer_desc with a type
discriminator supporting dma-buf and user VA backed memory, extensible
for future buffer types.
The ib_umem_list API enables any uverbs command to accept multiple
buffers indexed by per-command slot enums, without requiring new UAPI
attributes for each buffer. A consumption check ensures userspace and
driver agree on which buffers are used.
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
drivers/infiniband/core/umem.c | 248 ++++++++++++++++++++++++
include/rdma/ib_umem.h | 54 ++++++
include/rdma/uverbs_ioctl.h | 14 ++
include/uapi/rdma/ib_user_ioctl_cmds.h | 1 +
include/uapi/rdma/ib_user_ioctl_verbs.h | 27 +++
5 files changed, 344 insertions(+)
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 786fa1aa8e55..f5b03e903b9d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -37,6 +37,7 @@
#include <linux/dma-mapping.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <linux/err.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
@@ -332,3 +333,250 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
return 0;
}
EXPORT_SYMBOL(ib_umem_copy_from);
+
+struct ib_umem_list {
+ unsigned int count; /* Total slots in the list. */
+ unsigned long provided; /* Bitmask of slots provided by the user. */
+ unsigned long loaded; /* Bitmask of slots loaded by the driver. */
+ struct ib_umem *umems[] __counted_by(count);
+};
+
+/**
+ * ib_umem_list_create - Create a umem list from UVERBS_ATTR_BUFFERS
+ * @device: IB device
+ * @attrs: uverbs attribute bundle
+ * @slot_max: highest buffer slot index (count = slot_max + 1)
+ *
+ * Return: umem list, or ERR_PTR on failure.
+ */
+struct ib_umem_list *ib_umem_list_create(struct ib_device *device,
+ const struct uverbs_attr_bundle *attrs,
+ unsigned int slot_max)
+{
+ const struct ib_uverbs_buffer_desc *descs;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem_list *list;
+ struct ib_umem *umem;
+ unsigned int count;
+ int num_descs;
+ int err;
+ int i;
+
+ if (WARN_ON_ONCE(slot_max >= BITS_PER_LONG))
+ return ERR_PTR(-EINVAL);
+ count = slot_max + 1;
+
+ num_descs = uverbs_attr_ptr_get_array_size(
+ (struct uverbs_attr_bundle *)attrs, UVERBS_ATTR_BUFFERS,
+ sizeof(*descs));
+ if (num_descs == -ENOENT) {
+ num_descs = 0;
+ descs = NULL;
+ } else if (num_descs < 0) {
+ return ERR_PTR(num_descs);
+ } else if (num_descs > count) {
+ return ERR_PTR(-EINVAL);
+ } else {
+ descs = uverbs_attr_get_alloced_ptr(attrs, UVERBS_ATTR_BUFFERS);
+ if (IS_ERR(descs))
+ return ERR_CAST(descs);
+ }
+
+ list = kzalloc(struct_size(list, umems, count), GFP_KERNEL);
+ if (!list)
+ return ERR_PTR(-ENOMEM);
+ list->count = count;
+
+ for (i = 0; i < num_descs; i++) {
+ unsigned int idx = descs[i].index;
+
+ if (descs[i].reserved) {
+ err = -EINVAL;
+ goto err_release;
+ }
+ if (idx >= count || (list->provided & BIT(idx))) {
+ err = -EINVAL;
+ goto err_release;
+ }
+
+ switch (descs[i].type) {
+ case IB_UVERBS_BUFFER_TYPE_DMABUF:
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(
+ device, descs[i].addr, descs[i].length,
+ descs[i].fd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ goto err_release;
+ }
+ list->umems[idx] = &umem_dmabuf->umem;
+ break;
+ case IB_UVERBS_BUFFER_TYPE_VA:
+ umem = ib_umem_get(device, descs[i].addr,
+ descs[i].length, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ goto err_release;
+ }
+ list->umems[idx] = umem;
+ break;
+ default:
+ err = -EINVAL;
+ goto err_release;
+ }
+ list->provided |= BIT(idx);
+ }
+
+ return list;
+
+err_release:
+ ib_umem_list_release(list);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ib_umem_list_create);
+
+/**
+ * ib_umem_list_release - Release all umems in the list and free it
+ * @list: umem list
+ */
+void ib_umem_list_release(struct ib_umem_list *list)
+{
+ int i;
+
+ if (!list)
+ return;
+ for (i = 0; i < list->count; i++)
+ ib_umem_release(list->umems[i]);
+ kfree(list);
+}
+EXPORT_SYMBOL(ib_umem_list_release);
+
+/**
+ * ib_umem_list_check_consumed - Verify all provided umems were loaded
+ * @list: umem list
+ *
+ * Return: 0 if all provided slots were loaded, -EINVAL otherwise.
+ */
+int ib_umem_list_check_consumed(const struct ib_umem_list *list)
+{
+ return (list->provided & ~list->loaded) == 0 ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL(ib_umem_list_check_consumed);
+
+/**
+ * ib_umem_list_insert - Insert a umem into the list at a given index
+ * @list: umem list
+ * @index: per-command buffer slot index
+ * @umem: umem pointer to store
+ *
+ * Stores @umem at @index (replacing any existing). For use from create_cq
+ * when the buffer comes from legacy ATTRs rather than the buffer list.
+ */
+void ib_umem_list_insert(struct ib_umem_list *list, unsigned int index,
+ struct ib_umem *umem)
+{
+ ib_umem_list_replace(list, index, umem);
+ if (umem)
+ list->provided |= BIT(index);
+}
+EXPORT_SYMBOL(ib_umem_list_insert);
+
+/**
+ * ib_umem_list_load - Load a umem from the list by index
+ * @list: umem list (may be NULL)
+ * @index: per-command buffer slot index
+ * @size: minimum required umem length
+ *
+ * Return: umem pointer, or NULL if the slot is empty or
+ * the slot is out of bounds, or ERR_PTR(-EINVAL) if the umem is too small.
+ */
+struct ib_umem *ib_umem_list_load(struct ib_umem_list *list,
+ unsigned int index, size_t size)
+{
+ struct ib_umem *umem;
+
+ if (!list || index >= list->count)
+ return NULL;
+ umem = list->umems[index];
+ if (!umem)
+ return NULL;
+ if (umem->length < size)
+ return ERR_PTR(-EINVAL);
+ list->loaded |= BIT(index);
+ return umem;
+}
+EXPORT_SYMBOL(ib_umem_list_load);
+
+/**
+ * ib_umem_list_load_or_get - Umem from list or pin user memory
+ * @list: umem list (may be NULL)
+ * @index: per-command buffer slot index
+ * @device: IB device for ib_umem_get when the list slot is empty
+ * @addr: user virtual address for ib_umem_get
+ * @size: length for ib_umem_get
+ * @access: access flags for ib_umem_get
+ *
+ * If @list has a umem at @index, returns it like ib_umem_list_load() (and
+ * marks the slot loaded). Otherwise calls ib_umem_get() with the given
+ * @access flags and on success stores the result at @index when
+ * @list is non-NULL.
+ *
+ * Return: valid umem pointer, or ERR_PTR.
+ */
+struct ib_umem *ib_umem_list_load_or_get(struct ib_umem_list *list,
+ unsigned int index,
+ struct ib_device *device,
+ unsigned long addr, size_t size,
+ int access)
+{
+ struct ib_umem *umem;
+
+ umem = ib_umem_list_load(list, index, size);
+ if (IS_ERR(umem) || umem)
+ return umem;
+ umem = ib_umem_get(device, addr, size, access);
+ if (IS_ERR(umem))
+ return umem;
+ if (list && index < list->count)
+ list->umems[index] = umem;
+ return umem;
+}
+EXPORT_SYMBOL(ib_umem_list_load_or_get);
+
+/**
+ * ib_umem_list_replace - Replace umem at index, releasing the previous one
+ * @list: umem list (may be NULL)
+ * @index: per-command buffer slot index
+ * @umem: new umem pointer (may be NULL to clear the slot)
+ *
+ * Stores @umem at @index. If a different umem was already stored there, it is
+ * released. Used for CQ resize and similar.
+ */
+void ib_umem_list_replace(struct ib_umem_list *list, unsigned int index,
+ struct ib_umem *umem)
+{
+ struct ib_umem *old;
+
+ if (!list || index >= list->count)
+ return;
+ old = list->umems[index];
+ list->umems[index] = umem;
+ if (old && old != umem)
+ ib_umem_release(old);
+}
+EXPORT_SYMBOL(ib_umem_list_replace);
+
+/**
+ * ib_umem_release_non_listed - Release a umem that is not stored in the list
+ * @list: umem list
+ * @index: per-command buffer slot index
+ * @umem: umem pointer to release
+ *
+ * Releases @umem if it is not stored in @list.
+ */
+void ib_umem_release_non_listed(struct ib_umem_list *list, unsigned int index,
+ struct ib_umem *umem)
+{
+ if (!list || index >= list->count || list->umems[index] != umem)
+ ib_umem_release(umem);
+}
+EXPORT_SYMBOL(ib_umem_release_non_listed);
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 2ad52cc1d52b..924acb8d08c3 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -11,6 +11,7 @@
struct ib_device;
struct dma_buf_attach_ops;
+struct uverbs_attr_bundle;
struct ib_umem {
struct ib_device *ibdev;
@@ -80,6 +81,36 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
void ib_umem_release(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
+
+/**
+ * struct ib_umem_list - collection of pre-mapped umems
+ *
+ * Created from the UVERBS_ATTR_BUFFERS attribute. Each entry is indexed
+ * by a per-command buffer slot enum (e.g., IB_UMEM_CQ_BUF for CQ CREATE).
+ * Drivers use ib_umem_list_load() to retrieve a specific umem by index.
+ */
+struct ib_umem_list;
+
+struct ib_umem_list *ib_umem_list_create(struct ib_device *device,
+ const struct uverbs_attr_bundle *attrs,
+ unsigned int slot_max);
+void ib_umem_list_release(struct ib_umem_list *list);
+int ib_umem_list_check_consumed(const struct ib_umem_list *list);
+void ib_umem_list_insert(struct ib_umem_list *list, unsigned int index,
+ struct ib_umem *umem);
+
+struct ib_umem *ib_umem_list_load(struct ib_umem_list *list,
+ unsigned int index, size_t size);
+struct ib_umem *ib_umem_list_load_or_get(struct ib_umem_list *list,
+ unsigned int index,
+ struct ib_device *device,
+ unsigned long addr, size_t size,
+ int access);
+void ib_umem_list_replace(struct ib_umem_list *list, unsigned int index,
+ struct ib_umem *umem);
+void ib_umem_release_non_listed(struct ib_umem_list *list, unsigned int index,
+ struct ib_umem *umem);
+
unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt);
@@ -230,5 +261,28 @@ static inline void ib_umem_dmabuf_revoke_lock(struct ib_umem_dmabuf *umem_dmabuf
static inline void ib_umem_dmabuf_revoke_unlock(struct ib_umem_dmabuf *umem_dmabuf) {}
static inline void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) {}
+struct ib_umem_list;
+
+static inline void ib_umem_list_release(struct ib_umem_list *list) { }
+static inline struct ib_umem *ib_umem_list_load(struct ib_umem_list *list,
+ unsigned int index,
+ size_t size)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct ib_umem *
+ib_umem_list_load_or_get(struct ib_umem_list *list, unsigned int index,
+ struct ib_device *device, unsigned long addr,
+ size_t size, int access)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+static inline void ib_umem_list_replace(struct ib_umem_list *list,
+ unsigned int index,
+ struct ib_umem *umem) { }
+static inline void ib_umem_release_non_listed(struct ib_umem_list *list,
+ unsigned int index,
+ struct ib_umem *umem) { }
+
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index e2af17da3e32..05bcab27a87d 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -590,6 +590,20 @@ struct uapi_definition {
UA_OPTIONAL, \
.is_udata = 1)
+/*
+ * Optional array of struct ib_uverbs_buffer_desc describing memory regions
+ * backed by dma-buf or user virtual address. Can be added to any method
+ * that needs external buffer support.
+ * Each entry carries an index field selecting the per-command buffer slot.
+ * Use ib_umem_list_create() to map them and ib_umem_list_load() to access.
+ */
+#define UVERBS_ATTR_BUFFERS() \
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_BUFFERS, \
+ UVERBS_ATTR_MIN_SIZE( \
+ sizeof(struct ib_uverbs_buffer_desc)), \
+ UA_OPTIONAL, \
+ UA_ALLOC_AND_COPY)
+
/* =================================================
* Parsing infrastructure
* =================================================
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 72041c1b0ea5..10aa6568abf1 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -64,6 +64,7 @@ enum {
UVERBS_ATTR_UHW_IN = UVERBS_ID_DRIVER_NS,
UVERBS_ATTR_UHW_OUT,
UVERBS_ID_DRIVER_NS_WITH_UHW,
+ UVERBS_ATTR_BUFFERS,
};
enum uverbs_methods_device {
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 90c5cd8e7753..41ed9f75b4de 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -273,4 +273,31 @@ struct ib_uverbs_gid_entry {
__u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */
};
+enum ib_uverbs_buffer_type {
+ IB_UVERBS_BUFFER_TYPE_DMABUF,
+ IB_UVERBS_BUFFER_TYPE_VA,
+};
+
+/*
+ * Describes a single buffer backed by dma-buf or user virtual address.
+ * Passed as an array via UVERBS_ATTR_BUFFERS. Each uverb command that
+ * accepts this attribute defines its own per-command buffer slot enum.
+ * The index field selects the buffer slot this descriptor maps to.
+ *
+ * @fd: dma-buf file descriptor (valid for IB_UVERBS_BUFFER_TYPE_DMABUF)
+ * @type: buffer type from enum ib_uverbs_buffer_type
+ * @index: per-command buffer slot index
+ * @reserved: must be zero
+ * @addr: offset within dma-buf, or user virtual address for VA
+ * @length: buffer length in bytes
+ */
+struct ib_uverbs_buffer_desc {
+ __s32 fd;
+ __u32 type;
+ __u32 index;
+ __u32 reserved;
+ __aligned_u64 addr;
+ __aligned_u64 length;
+};
+
#endif
--
2.53.0
next prev parent reply other threads:[~2026-04-11 14:49 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-11 14:49 [PATCH rdma-next v2 00/15] RDMA: Introduce generic buffer descriptor infrastructure for umem Jiri Pirko
2026-04-11 14:49 ` Jiri Pirko [this message]
2026-04-12 12:33 ` [PATCH rdma-next v2 01/15] RDMA/core: " Michael Margolin
2026-04-11 14:49 ` [PATCH rdma-next v2 02/15] RDMA/uverbs: Push out CQ buffer umem processing into a helper Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 03/15] RDMA/uverbs: Integrate umem_list into CQ creation Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 04/15] RDMA/efa: Use umem_list for user CQ buffer Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 05/15] RDMA/mlx5: " Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 06/15] RDMA/bnxt_re: " Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 07/15] RDMA/mlx4: " Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 08/15] RDMA/uverbs: Remove legacy umem field from struct ib_cq Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 09/15] RDMA/uverbs: Verify all umem_list buffers are consumed after CQ creation Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 10/15] RDMA/uverbs: Integrate umem_list into QP creation Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 11/15] RDMA/mlx5: Use umem_list for QP buffers in create_qp Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 12/15] RDMA/uverbs: Add doorbell record buffer slot to CQ umem_list Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 13/15] RDMA/mlx5: Use umem_list for CQ doorbell record Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 14/15] RDMA/uverbs: Add doorbell record buffer slot to QP umem_list Jiri Pirko
2026-04-11 14:49 ` [PATCH rdma-next v2 15/15] RDMA/mlx5: Use umem_list for QP doorbell record Jiri Pirko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260411144915.114571-2-jiri@resnulli.us \
--to=jiri@resnulli.us \
--cc=andrew.gospodarek@broadcom.com \
--cc=edwards@nvidia.com \
--cc=gal.pressman@linux.dev \
--cc=huangjunxian6@hisilicon.com \
--cc=jgg@ziepe.ca \
--cc=kalesh-anakkur.purayil@broadcom.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=lirongqing@baidu.com \
--cc=marco.crivellari@suse.com \
--cc=mbloch@nvidia.com \
--cc=michaelgur@nvidia.com \
--cc=mrgolin@amazon.com \
--cc=ohartoov@nvidia.com \
--cc=parav@nvidia.com \
--cc=phaddad@nvidia.com \
--cc=roman.gushchin@linux.dev \
--cc=selvin.xavier@broadcom.com \
--cc=shayd@nvidia.com \
--cc=sleybo@amazon.com \
--cc=sriharsha.basavapatna@broadcom.com \
--cc=yanjun.zhu@linux.dev \
--cc=ynachum@amazon.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox