From: Jacob Moroni <jmoroni@google.com>
To: tatyana.e.nikolova@intel.com, krzysztof.czurylo@intel.com,
jgg@ziepe.ca, leon@kernel.org
Cc: linux-rdma@vger.kernel.org, Jacob Moroni <jmoroni@google.com>
Subject: [RFC 1/2] RDMA/umem: Add support for pinned revocable dmabuf import
Date: Mon, 23 Feb 2026 19:53:32 +0000 [thread overview]
Message-ID: <20260223195333.438492-1-jmoroni@google.com> (raw)
In order to eventually import a dmabuf from VFIO, pinned importers
will need to support revocation. This can be achieved by allowing
the drivers to provide a revoke callback when obtaining the umem.
The drivers can use this callback to ensure that the region is
invalidated in a way that guarantees no further HW accesses, but,
in the case of an MR, does not actually release the key for reuse
until the region is fully deregistered (i.e., ibv_dereg_mr).
It should be noted that revocation is asynchronous, so drivers that
wish to switch to this new routine must ensure that their internal
state is protected.
Signed-off-by: Jacob Moroni <jmoroni@google.com>
---
drivers/infiniband/core/umem_dmabuf.c | 109 +++++++++++++++++++++++---
drivers/infiniband/hw/mlx5/mr.c | 2 +-
include/rdma/ib_umem.h | 27 ++++++-
3 files changed, 121 insertions(+), 17 deletions(-)
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index f5298c33e581..cf97653df9a9 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -195,22 +195,64 @@ static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = {
.move_notify = ib_umem_dmabuf_unsupported_move_notify,
};
+static void __ib_umem_dmabuf_revoke(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (umem_dmabuf->revoked)
+ return;
+
+ /* Will be NULL for drivers that do not request a revocable umem, or
+ * during the (protected) window between attach and pin+map_pages.
+ */
+ if (umem_dmabuf->revoke)
+ umem_dmabuf->revoke(umem_dmabuf->revoke_priv);
+
+ /* HW should no longer touch the memory at this point. */
+
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ if (umem_dmabuf->pinned) {
+ dma_buf_unpin(umem_dmabuf->attach);
+ umem_dmabuf->pinned = 0;
+ }
+ umem_dmabuf->revoked = 1;
+}
+
+static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_revocable_ops = {
+ .allow_peer2peer = true,
+ .move_notify = __ib_umem_dmabuf_revoke,
+};
+
struct ib_umem_dmabuf *
ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
struct device *dma_device,
unsigned long offset, size_t size,
- int fd, int access)
+ int fd, int access,
+ void (*revoke)(void *priv),
+ void *revoke_priv)
{
struct ib_umem_dmabuf *umem_dmabuf;
+ struct dma_buf_attach_ops *ops;
int err;
+ ops = revoke ?
+ &ib_umem_dmabuf_attach_pinned_revocable_ops :
+ &ib_umem_dmabuf_attach_pinned_ops;
+
umem_dmabuf = ib_umem_dmabuf_get_with_dma_device(device, dma_device, offset,
- size, fd, access,
- &ib_umem_dmabuf_attach_pinned_ops);
+ size, fd, access, ops);
if (IS_ERR(umem_dmabuf))
return umem_dmabuf;
dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+
+ if (umem_dmabuf->revoked) {
+ err = -ENODEV;
+ goto err_release;
+ }
+
err = dma_buf_pin(umem_dmabuf->attach);
if (err)
goto err_release;
@@ -219,12 +261,17 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
err = ib_umem_dmabuf_map_pages(umem_dmabuf);
if (err)
goto err_unpin;
+
+ umem_dmabuf->revoke = revoke;
+ umem_dmabuf->revoke_priv = revoke_priv;
+
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
return umem_dmabuf;
err_unpin:
dma_buf_unpin(umem_dmabuf->attach);
+ umem_dmabuf->pinned = 0;
err_release:
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
ib_umem_release(&umem_dmabuf->umem);
@@ -238,24 +285,60 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
int access)
{
return ib_umem_dmabuf_get_pinned_with_dma_device(device, device->dma_device,
- offset, size, fd, access);
+ offset, size, fd, access,
+ NULL, NULL);
}
EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned);
+/**
+ * ib_umem_dmabuf_get_pinned_revocable - Get a pinned but revocable umem dmabuf.
+ * @device: IB device.
+ * @offset: Start offset.
+ * @size: Length.
+ * @fd: dmabuf fd.
+ * @access: Access flags.
+ * @revoke: Driver revoke callback.
+ * @revoke_priv: Driver revoke callback private data.
+ *
+ * Obtains a umem from a dmabuf for drivers/devices that can support revocation.
+ *
+ * When a revocation occurs, the revoke callback will be called. The driver must
+ * ensure that the region is no longer accessed when the callback returns. Any
+ * subsequent access attempts should also probably cause an AE.
+ *
+ * If the umem is used for an MR, the driver must ensure that the key remains in
+ * use such that it cannot be obtained by a new region until this region is
+ * fully deregistered (i.e., ibv_dereg_mr).
+ *
+ * If a driver needs to serialize with revoke calls, it can use dma_resv_lock to
+ * avoid needing to embed a lock into every MR.
+ *
+ * If successful, then the revoke callback may be called at any time and will
+ * also be called automatically upon ib_umem_release (serialized). The revoke
+ * callback will be called one time at most.
+ *
+ * If unsuccessful, then the revoke callback will never be called.
+ */
+struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_revocable(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access,
+ void (*revoke)(void *priv),
+ void *revoke_priv)
+{
+ return ib_umem_dmabuf_get_pinned_with_dma_device(device, device->dma_device,
+ offset, size, fd, access,
+ revoke, revoke_priv);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned_revocable);
+
void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf)
{
struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
dma_resv_lock(dmabuf->resv, NULL);
- if (umem_dmabuf->revoked)
- goto end;
- ib_umem_dmabuf_unmap_pages(umem_dmabuf);
- if (umem_dmabuf->pinned) {
- dma_buf_unpin(umem_dmabuf->attach);
- umem_dmabuf->pinned = 0;
- }
- umem_dmabuf->revoked = 1;
-end:
+ __ib_umem_dmabuf_revoke(umem_dmabuf->attach);
dma_resv_unlock(dmabuf->resv);
}
EXPORT_SYMBOL(ib_umem_dmabuf_revoke);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 665323b90b64..ad8b5bcf1b41 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1648,7 +1648,7 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
else if (dma_device)
umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev,
dma_device, offset, length,
- fd, access_flags);
+ fd, access_flags, NULL, NULL);
else
umem_dmabuf = ib_umem_dmabuf_get_pinned(
&dev->ib_dev, offset, length, fd, access_flags);
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 0a8e092c0ea8..3d37d5b79dd4 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,6 +36,8 @@ struct ib_umem_dmabuf {
struct scatterlist *last_sg;
unsigned long first_sg_offset;
unsigned long last_sg_trim;
+ void (*revoke)(void *priv);
+ void *revoke_priv;
void *private;
u8 pinned : 1;
u8 revoked : 1;
@@ -169,10 +171,19 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
size_t size, int fd,
int access);
struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_revocable(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access,
+ void (*revoke)(void *priv),
+ void *revoke_priv);
+struct ib_umem_dmabuf *
ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
struct device *dma_device,
unsigned long offset, size_t size,
- int fd, int access);
+ int fd, int access,
+ void (*revoke)(void *priv),
+ void *revoke_priv);
int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf);
void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf);
void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf);
@@ -220,12 +231,22 @@ ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset,
{
return ERR_PTR(-EOPNOTSUPP);
}
-
+static inline struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_revocable(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd, int access,
+ void (*revoke)(void *priv),
+ void *revoke_priv)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
static inline struct ib_umem_dmabuf *
ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
struct device *dma_device,
unsigned long offset, size_t size,
- int fd, int access)
+ int fd, int access,
+ void (*revoke)(void *priv),
+ void *revoke_priv)
{
return ERR_PTR(-EOPNOTSUPP);
}
--
2.53.0.371.g1d285c8824-goog
next reply other threads:[~2026-02-23 19:53 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-23 19:53 Jacob Moroni [this message]
2026-02-23 19:53 ` [RFC 2/2] RDMA/irdma: Add pinned revocable dmabuf support Jacob Moroni
2026-02-24 18:51 ` Jason Gunthorpe
2026-02-25 21:02 ` Jacob Moroni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260223195333.438492-1-jmoroni@google.com \
--to=jmoroni@google.com \
--cc=jgg@ziepe.ca \
--cc=krzysztof.czurylo@intel.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=tatyana.e.nikolova@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox