From: Jacob Moroni <jmoroni@google.com>
To: tatyana.e.nikolova@intel.com, krzysztof.czurylo@intel.com,
jgg@ziepe.ca, leon@kernel.org
Cc: linux-rdma@vger.kernel.org, Jacob Moroni <jmoroni@google.com>
Subject: [RFC 2/2] RDMA/irdma: Add pinned revocable dmabuf support
Date: Mon, 23 Feb 2026 19:53:33 +0000 [thread overview]
Message-ID: <20260223195333.438492-2-jmoroni@google.com> (raw)
In-Reply-To: <20260223195333.438492-1-jmoroni@google.com>
Some dmabuf exporters (like VFIO) will require that pinned
importers support revocation. In order to support this, the new
ib_umem_dmabuf_get_pinned_revocable method can be used, which
allows the driver to provide a revoke callback for the umem.
Upon revocation, the driver will invalidate the region in HW
so that it is no longer accessed.
It is worth noting that the irdma driver handles MR key allocation
in software; the command submitted to hardware during the revoke
invalidates the key, but the key is not available for reuse until
the region is fully deregistered (i.e., ibv_dereg_mr).
Tested with lockdep+kasan and a modified VFIO that allows pinned
importers by triggering a VFIO_DEVICE_RESET while the region is
registered to ensure that the callback is executed properly.
Signed-off-by: Jacob Moroni <jmoroni@google.com>
---
drivers/infiniband/hw/irdma/main.h | 1 +
drivers/infiniband/hw/irdma/verbs.c | 125 ++++++++++++++++++++--------
drivers/infiniband/hw/irdma/verbs.h | 1 +
3 files changed, 94 insertions(+), 33 deletions(-)
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index d320d1a228b3..240c7977903d 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -20,6 +20,7 @@
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-resv.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/io.h>
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 15af53237217..c269f0954f82 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -3359,19 +3359,14 @@ static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access,
return err;
}
-static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
- struct ib_pd *pd, u64 virt,
- enum irdma_memreg_type reg_type)
+static int irdma_init_iwmr(struct irdma_mr *iwmr, struct ib_umem *region,
+ struct ib_pd *pd, u64 virt,
+ enum irdma_memreg_type reg_type)
{
struct irdma_device *iwdev = to_iwdev(pd->device);
struct irdma_pbl *iwpbl;
- struct irdma_mr *iwmr;
unsigned long pgsz_bitmap;
- iwmr = kzalloc_obj(*iwmr);
- if (!iwmr)
- return ERR_PTR(-ENOMEM);
-
iwpbl = &iwmr->iwpbl;
iwpbl->iwmr = iwmr;
iwmr->region = region;
@@ -3384,21 +3379,14 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
- if (unlikely(!iwmr->page_size)) {
- kfree(iwmr);
- return ERR_PTR(-EOPNOTSUPP);
- }
+ if (unlikely(!iwmr->page_size))
+ return -EOPNOTSUPP;
iwmr->len = region->length;
iwpbl->user_base = virt;
iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
- return iwmr;
-}
-
-static void irdma_free_iwmr(struct irdma_mr *iwmr)
-{
- kfree(iwmr);
+ return 0;
}
static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
@@ -3547,12 +3535,16 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
return ERR_PTR(-EFAULT);
}
- iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type);
- if (IS_ERR(iwmr)) {
+ iwmr = kzalloc_obj(*iwmr);
+ if (!iwmr) {
ib_umem_release(region);
- return (struct ib_mr *)iwmr;
+ return ERR_PTR(-ENOMEM);
}
+ err = irdma_init_iwmr(iwmr, region, pd, virt, req.reg_type);
+ if (err)
+ goto error;
+
switch (req.reg_type) {
case IRDMA_MEMREG_TYPE_QP:
err = irdma_reg_user_mr_type_qp(req, udata, iwmr);
@@ -3585,11 +3577,39 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
return &iwmr->ibmr;
error:
ib_umem_release(region);
- irdma_free_iwmr(iwmr);
+ kfree(iwmr);
return ERR_PTR(err);
}
+static int irdma_hwdereg_mr(struct ib_mr *ib_mr);
+
+static void irdma_umem_dmabuf_revoke(void *priv)
+{
+ struct irdma_mr *iwmr = priv;
+ int err;
+
+ iwmr->revoked = true;
+
+ if (!iwmr->is_hwreg)
+ return;
+
+ /* Invalidate the key in hardware. This does not actually release the
+ * key for potential reuse - that only occurs when the region is fully
+ * deregistered.
+ */
+ err = irdma_hwdereg_mr(&iwmr->ibmr);
+ if (err) {
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+
+ ibdev_err(&iwdev->ibdev, "dmabuf mr invalidate failed %d", err);
+ if (!iwdev->rf->reset) {
+ iwdev->rf->reset = true;
+ iwdev->rf->gen_ops.request_reset(iwdev->rf);
+ }
+ }
+}
+
static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
u64 len, u64 virt,
int fd, int access,
@@ -3607,31 +3627,45 @@ static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
return ERR_PTR(-EINVAL);
- umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access);
+ iwmr = kzalloc_obj(*iwmr);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf =
+ ib_umem_dmabuf_get_pinned_revocable(pd->device, start, len, fd,
+ access,
+ irdma_umem_dmabuf_revoke,
+ iwmr);
if (IS_ERR(umem_dmabuf)) {
ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%pe]\n",
umem_dmabuf);
+ kfree(iwmr);
return ERR_CAST(umem_dmabuf);
}
- iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM);
- if (IS_ERR(iwmr)) {
- err = PTR_ERR(iwmr);
+ err = irdma_init_iwmr(iwmr, &umem_dmabuf->umem, pd, virt,
+ IRDMA_MEMREG_TYPE_MEM);
+ if (err)
goto err_release;
- }
- err = irdma_reg_user_mr_type_mem(iwmr, access, true);
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ /* Catch revocations that occur before grabbing dma_resv_lock. */
+ err = iwmr->revoked ?
+ -ENODEV : irdma_reg_user_mr_type_mem(iwmr, access, true);
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
if (err)
- goto err_iwmr;
+ goto err_release;
return &iwmr->ibmr;
-err_iwmr:
- irdma_free_iwmr(iwmr);
-
err_release:
+ /* ib_umem_release will result in the irdma_umem_dmabuf_revoke callback
+ * being called, but it ends up being a no-op if the region has not been
+ * successfully registered with HW because iwmr->is_hwreg is false.
+ */
ib_umem_release(&umem_dmabuf->umem);
-
+ kfree(iwmr);
return ERR_PTR(err);
}
@@ -3899,6 +3933,28 @@ static void irdma_del_memlist(struct irdma_mr *iwmr,
}
}
+/**
+ * irdma_dereg_mr_dmabuf - deregister a dmabuf mr
+ * @iwdev: iwarp device
+ * @iwmr: mr
+ */
+static int irdma_dereg_mr_dmabuf(struct irdma_device *iwdev,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+
+ /* Causes a synchronous revoke which then causes HW invalidation. */
+ ib_umem_release(iwmr->region);
+
+ irdma_free_stag(iwdev, iwmr->stag);
+
+ if (iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ kfree(iwmr);
+ return 0;
+}
+
/**
* irdma_dereg_mr - deregister mr
* @ib_mr: mr ptr for dereg
@@ -3911,6 +3967,9 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
struct irdma_pbl *iwpbl = &iwmr->iwpbl;
int ret;
+ if (iwmr->region && iwmr->region->is_dmabuf)
+ return irdma_dereg_mr_dmabuf(iwdev, iwmr);
+
if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
if (iwmr->region) {
struct irdma_ucontext *ucontext;
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index aabbb3442098..612c66c91db4 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -113,6 +113,7 @@ struct irdma_mr {
int access;
bool is_hwreg:1;
bool dma_mr:1;
+ bool revoked:1;
u16 type;
u32 page_cnt;
u64 page_size;
--
2.53.0.371.g1d285c8824-goog
next prev parent reply other threads:[~2026-02-23 19:53 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-23 19:53 [RFC 1/2] RDMA/umem: Add support for pinned revocable dmabuf import Jacob Moroni
2026-02-23 19:53 ` Jacob Moroni [this message]
2026-02-24 18:51 ` [RFC 2/2] RDMA/irdma: Add pinned revocable dmabuf support Jason Gunthorpe
2026-02-25 21:02 ` Jacob Moroni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260223195333.438492-2-jmoroni@google.com \
--to=jmoroni@google.com \
--cc=jgg@ziepe.ca \
--cc=krzysztof.czurylo@intel.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=tatyana.e.nikolova@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox