From: Jason Gunthorpe <jgg@nvidia.com>
To: "Alex Williamson" <alex@shazbot.org>,
"Christian König" <christian.koenig@amd.com>,
dri-devel@lists.freedesktop.org, iommu@lists.linux.dev,
"Joerg Roedel" <joro@8bytes.org>,
"Kevin Tian" <kevin.tian@intel.com>,
kvm@vger.kernel.org, linaro-mm-sig@lists.linaro.org,
linux-kselftest@vger.kernel.org, linux-media@vger.kernel.org,
"Robin Murphy" <robin.murphy@arm.com>,
"Shuah Khan" <shuah@kernel.org>,
"Sumit Semwal" <sumit.semwal@linaro.org>,
"Will Deacon" <will@kernel.org>
Cc: Krishnakant Jaju <kjaju@nvidia.com>,
Leon Romanovsky <leon@kernel.org>, Matt Ochs <mochs@nvidia.com>,
Nicolin Chen <nicolinc@nvidia.com>,
patches@lists.linux.dev, Simona Vetter <simona.vetter@ffwll.ch>,
Vivek Kasireddy <vivek.kasireddy@intel.com>,
Xu Yilun <yilun.xu@linux.intel.com>
Subject: [PATCH 2/9] iommufd: Add DMABUF to iopt_pages
Date: Fri, 7 Nov 2025 12:49:34 -0400 [thread overview]
Message-ID: <2-v1-af84a3ab44f5+f68-iommufd_buf_jgg@nvidia.com> (raw)
In-Reply-To: <0-v1-af84a3ab44f5+f68-iommufd_buf_jgg@nvidia.com>
Add IOPT_ADDRESS_DMABUF to the iopt_pages and the basic infrastructure to
create an iopt_pages from a struct dma_buf *.
DMABUF pages are not supported for accesses, and for now can only be used
with the VFIO DMABUF exporter.
The overall flow will be similar to memfd where the user can pass in a
DMABUF file descriptor to IOMMU_IOAS_MAP_FILE and create an area and
pages. Like other areas it can be copied and otherwise manipulated, though
there is little point in doing so.
There is no pinned page accounting done for DMABUF maps.
The DMABUF attachment exists so long as the dmabuf is mapped into an IOAS,
even if the IOAS is not mapped to any domains.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/iommufd/io_pagetable.c | 3 +
drivers/iommu/iommufd/io_pagetable.h | 24 ++++-
drivers/iommu/iommufd/iommufd_private.h | 2 +
drivers/iommu/iommufd/main.c | 10 ++
drivers/iommu/iommufd/pages.c | 133 +++++++++++++++++++++++-
5 files changed, 168 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index c0360c450880b8..b3cf3825a88c7f 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -284,6 +284,9 @@ static int iopt_alloc_area_pages(struct io_pagetable *iopt,
case IOPT_ADDRESS_FILE:
start = elm->start_byte + elm->pages->start;
break;
+ case IOPT_ADDRESS_DMABUF:
+ start = elm->start_byte + elm->pages->dmabuf.start;
+ break;
}
rc = iopt_alloc_iova(iopt, dst_iova, start, length);
if (rc)
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index b6064f4ce4af91..389e3c3c80f335 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -5,6 +5,7 @@
#ifndef __IO_PAGETABLE_H
#define __IO_PAGETABLE_H
+#include <linux/dma-buf.h>
#include <linux/interval_tree.h>
#include <linux/kref.h>
#include <linux/mutex.h>
@@ -179,7 +180,14 @@ enum {
enum iopt_address_type {
IOPT_ADDRESS_USER = 0,
- IOPT_ADDRESS_FILE = 1,
+ IOPT_ADDRESS_FILE,
+ IOPT_ADDRESS_DMABUF,
+};
+
+struct iopt_pages_dmabuf {
+ struct dma_buf_attachment *attach;
+ struct dma_buf_phys_vec phys;
+ unsigned long start;
};
/*
@@ -209,6 +217,8 @@ struct iopt_pages {
struct file *file;
unsigned long start;
};
+ /* IOPT_ADDRESS_DMABUF */
+ struct iopt_pages_dmabuf dmabuf;
};
bool writable:1;
u8 account_mode;
@@ -220,10 +230,22 @@ struct iopt_pages {
struct rb_root_cached domains_itree;
};
+static inline bool iopt_is_dmabuf(struct iopt_pages *pages)
+{
+ if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+ return false;
+ return pages->type == IOPT_ADDRESS_DMABUF;
+}
+
struct iopt_pages *iopt_alloc_user_pages(void __user *uptr,
unsigned long length, bool writable);
struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start,
unsigned long length, bool writable);
+struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx,
+ struct dma_buf *dmabuf,
+ unsigned long start_byte,
+ unsigned long start,
+ unsigned long length, bool writable);
void iopt_release_pages(struct kref *kref);
static inline void iopt_put_pages(struct iopt_pages *pages)
{
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 627f9b78483a0e..10ee3510312132 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -504,6 +504,8 @@ void iommufd_device_pre_destroy(struct iommufd_object *obj);
void iommufd_device_destroy(struct iommufd_object *obj);
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
+struct device *iommufd_global_device(void);
+
struct iommufd_access {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index ce775fbbae94e7..5cc4b08c25f585 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -751,6 +751,15 @@ static struct miscdevice vfio_misc_dev = {
.mode = 0666,
};
+/*
+ * Used only by DMABUF, returns a valid struct device to use as a dummy struct
+ * device for attachment.
+ */
+struct device *iommufd_global_device(void)
+{
+ return iommu_misc_dev.this_device;
+}
+
static int __init iommufd_init(void)
{
int ret;
@@ -794,5 +803,6 @@ MODULE_ALIAS("devname:vfio/vfio");
#endif
MODULE_IMPORT_NS("IOMMUFD_INTERNAL");
MODULE_IMPORT_NS("IOMMUFD");
+MODULE_IMPORT_NS("DMA_BUF");
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index c3433b84556172..7bc4731501cc21 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -45,6 +45,8 @@
* last_iova + 1 can overflow. An iopt_pages index will always be much less than
* ULONG_MAX so last_index + 1 cannot overflow.
*/
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include <linux/file.h>
#include <linux/highmem.h>
#include <linux/iommu.h>
@@ -53,6 +55,7 @@
#include <linux/overflow.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
+#include <linux/vfio_pci_core.h>
#include "double_span.h"
#include "io_pagetable.h"
@@ -272,6 +275,7 @@ struct pfn_batch {
unsigned int end;
unsigned int total_pfns;
};
+enum { MAX_NPFNS = type_max(typeof(((struct pfn_batch *)0)->npfns[0])) };
static void batch_clear(struct pfn_batch *batch)
{
@@ -350,7 +354,6 @@ static void batch_destroy(struct pfn_batch *batch, void *backup)
static bool batch_add_pfn_num(struct pfn_batch *batch, unsigned long pfn,
u32 nr)
{
- const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns));
unsigned int end = batch->end;
if (end && pfn == batch->pfns[end - 1] + batch->npfns[end - 1] &&
@@ -1360,6 +1363,121 @@ struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start,
return pages;
}
+static void iopt_revoke_notify(struct dma_buf_attachment *attach)
+{
+ struct iopt_pages *pages = attach->importer_priv;
+
+ guard(mutex)(&pages->mutex);
+ pages->dmabuf.phys.len = 0;
+}
+
+static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = {
+ .allow_peer2peer = true,
+ .move_notify = iopt_revoke_notify,
+};
+
+/*
+ * iommufd and vfio have a circular dependency. Future work for a phys
+ * based private interconnect will remove this.
+ */
+static int
+sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
+ struct dma_buf_phys_vec *phys)
+{
+ typeof(&vfio_pci_dma_buf_iommufd_map) fn;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_DMABUF))
+ return -EOPNOTSUPP;
+
+ fn = symbol_get(vfio_pci_dma_buf_iommufd_map);
+ if (!fn)
+ return -EOPNOTSUPP;
+ rc = fn(attachment, phys);
+ symbol_put(vfio_pci_dma_buf_iommufd_map);
+ return rc;
+}
+
+static int iopt_map_dmabuf(struct iommufd_ctx *ictx, struct iopt_pages *pages,
+ struct dma_buf *dmabuf)
+{
+ struct dma_buf_attachment *attach;
+ int rc;
+
+ attach = dma_buf_dynamic_attach(dmabuf, iommufd_global_device(),
+ &iopt_dmabuf_attach_revoke_ops, pages);
+ if (IS_ERR(attach))
+ return PTR_ERR(attach);
+
+ dma_resv_lock(dmabuf->resv, NULL);
+ /*
+ * Lock ordering requires the mutex to be taken inside the reservation,
+ * make sure lockdep sees this.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ mutex_lock(&pages->mutex);
+ mutex_unlock(&pages->mutex);
+ }
+
+ rc = sym_vfio_pci_dma_buf_iommufd_map(attach, &pages->dmabuf.phys);
+ if (rc)
+ goto err_detach;
+
+ dma_resv_unlock(dmabuf->resv);
+
+ /* On success iopt_release_pages() will detach and put the dmabuf. */
+ pages->dmabuf.attach = attach;
+ return 0;
+
+err_detach:
+ dma_resv_unlock(dmabuf->resv);
+ dma_buf_detach(dmabuf, attach);
+ return rc;
+}
+
+struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx,
+ struct dma_buf *dmabuf,
+ unsigned long start_byte,
+ unsigned long start,
+ unsigned long length, bool writable)
+{
+ static struct lock_class_key pages_dmabuf_mutex_key;
+ struct iopt_pages *pages;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (dmabuf->size <= (start + length - 1) ||
+ length / PAGE_SIZE >= MAX_NPFNS)
+ return ERR_PTR(-EINVAL);
+
+ pages = iopt_alloc_pages(start_byte, length, writable);
+ if (IS_ERR(pages))
+ return pages;
+
+ /*
+ * The mmap_lock can be held when obtaining the dmabuf reservation lock
+ * which creates a locking cycle with the pages mutex which is held
+ * while obtaining the mmap_lock. This locking path is not present for
+ * IOPT_ADDRESS_DMABUF so split the lock class.
+ */
+ lockdep_set_class(&pages->mutex, &pages_dmabuf_mutex_key);
+
+ /* dmabuf does not use pinned page accounting. */
+ pages->account_mode = IOPT_PAGES_ACCOUNT_NONE;
+ pages->type = IOPT_ADDRESS_DMABUF;
+ pages->dmabuf.start = start - start_byte;
+
+ rc = iopt_map_dmabuf(ictx, pages, dmabuf);
+ if (rc) {
+ iopt_put_pages(pages);
+ return ERR_PTR(rc);
+ }
+
+ return pages;
+}
+
void iopt_release_pages(struct kref *kref)
{
struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref);
@@ -1372,8 +1490,14 @@ void iopt_release_pages(struct kref *kref)
mutex_destroy(&pages->mutex);
put_task_struct(pages->source_task);
free_uid(pages->source_user);
- if (pages->type == IOPT_ADDRESS_FILE)
+ if (iopt_is_dmabuf(pages) && pages->dmabuf.attach) {
+ struct dma_buf *dmabuf = pages->dmabuf.attach->dmabuf;
+
+ dma_buf_detach(dmabuf, pages->dmabuf.attach);
+ dma_buf_put(dmabuf);
+ } else if (pages->type == IOPT_ADDRESS_FILE) {
fput(pages->file);
+ }
kfree(pages);
}
@@ -2031,7 +2155,10 @@ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable)
return -EPERM;
- if (pages->type == IOPT_ADDRESS_FILE)
+ if (iopt_is_dmabuf(pages))
+ return -EINVAL;
+
+ if (pages->type != IOPT_ADDRESS_USER)
return iopt_pages_rw_slow(pages, start_index, last_index,
start_byte % PAGE_SIZE, data, length,
flags);
--
2.43.0
next prev parent reply other threads:[~2025-11-07 16:49 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-07 16:49 [PATCH 0/9] Initial DMABUF support for iommufd Jason Gunthorpe
2025-11-07 16:49 ` [PATCH 1/9] vfio/pci: Add vfio_pci_dma_buf_iommufd_map() Jason Gunthorpe
2025-11-20 7:49 ` Tian, Kevin
2025-11-20 17:34 ` Nicolin Chen
2025-11-07 16:49 ` Jason Gunthorpe [this message]
2025-11-07 18:02 ` [PATCH 2/9] iommufd: Add DMABUF to iopt_pages Nicolin Chen
2025-11-20 7:55 ` Tian, Kevin
2025-11-21 14:27 ` Jason Gunthorpe
2025-11-07 16:49 ` [PATCH 3/9] iommufd: Do not map/unmap revoked DMABUFs Jason Gunthorpe
2025-11-07 18:30 ` Nicolin Chen
2025-11-20 7:56 ` Tian, Kevin
2025-11-07 16:49 ` [PATCH 4/9] iommufd: Allow a DMABUF to be revoked Jason Gunthorpe
2025-11-13 23:26 ` Nicolin Chen
2025-11-20 7:58 ` Tian, Kevin
2025-11-07 16:49 ` [PATCH 5/9] iommufd: Allow MMIO pages in a batch Jason Gunthorpe
2025-11-13 23:28 ` Nicolin Chen
2025-11-20 7:59 ` Tian, Kevin
2025-11-20 14:59 ` Jason Gunthorpe
2025-11-07 16:49 ` [PATCH 6/9] iommufd: Have pfn_reader process DMABUF iopt_pages Jason Gunthorpe
2025-11-13 23:39 ` Nicolin Chen
2025-11-18 19:38 ` Jason Gunthorpe
2025-11-20 8:04 ` Tian, Kevin
2025-11-21 0:47 ` Jason Gunthorpe
2025-11-21 14:33 ` Jason Gunthorpe
2025-11-07 16:49 ` [PATCH 7/9] iommufd: Have iopt_map_file_pages convert the fd to a file Jason Gunthorpe
2025-11-13 23:43 ` Nicolin Chen
2025-11-20 8:05 ` Tian, Kevin
2025-11-07 16:49 ` [PATCH 8/9] iommufd: Accept a DMABUF through IOMMU_IOAS_MAP_FILE Jason Gunthorpe
2025-11-14 0:05 ` Nicolin Chen
2025-11-18 19:44 ` Jason Gunthorpe
2025-11-18 19:57 ` Nicolin Chen
2025-11-20 8:06 ` Tian, Kevin
2025-11-07 16:49 ` [PATCH 9/9] iommufd/selftest: Add some tests for the dmabuf flow Jason Gunthorpe
2025-11-07 19:43 ` Nicolin Chen
2025-11-18 19:25 ` Jason Gunthorpe
2025-11-20 8:06 ` Tian, Kevin
2025-11-07 17:52 ` [PATCH 0/9] Initial DMABUF support for iommufd Nicolin Chen
2025-11-13 6:33 ` Shuai Xue
2025-11-13 7:34 ` Nicolin Chen
2025-11-13 11:32 ` Shuai Xue
2025-11-13 17:44 ` Nicolin Chen
2025-11-13 18:37 ` Alex Williamson
2025-11-17 15:50 ` Jason Gunthorpe
2025-11-18 5:37 ` Kasireddy, Vivek
2025-11-18 14:59 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2-v1-af84a3ab44f5+f68-iommufd_buf_jgg@nvidia.com \
--to=jgg@nvidia.com \
--cc=alex@shazbot.org \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=iommu@lists.linux.dev \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kjaju@nvidia.com \
--cc=kvm@vger.kernel.org \
--cc=leon@kernel.org \
--cc=linaro-mm-sig@lists.linaro.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-media@vger.kernel.org \
--cc=mochs@nvidia.com \
--cc=nicolinc@nvidia.com \
--cc=patches@lists.linux.dev \
--cc=robin.murphy@arm.com \
--cc=shuah@kernel.org \
--cc=simona.vetter@ffwll.ch \
--cc=sumit.semwal@linaro.org \
--cc=vivek.kasireddy@intel.com \
--cc=will@kernel.org \
--cc=yilun.xu@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).