From: Nicolin Chen <nicolinc@nvidia.com>
To: <jgg@nvidia.com>, <kevin.tian@intel.com>, <corbet@lwn.net>,
<will@kernel.org>
Cc: <bagasdotme@gmail.com>, <robin.murphy@arm.com>, <joro@8bytes.org>,
<thierry.reding@gmail.com>, <vdumpa@nvidia.com>,
<jonathanh@nvidia.com>, <shuah@kernel.org>, <jsnitsel@redhat.com>,
<nathan@kernel.org>, <peterz@infradead.org>, <yi.l.liu@intel.com>,
<mshavit@google.com>, <praan@google.com>,
<zhangzekun11@huawei.com>, <iommu@lists.linux.dev>,
<linux-doc@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<linux-arm-kernel@lists.infradead.org>,
<linux-tegra@vger.kernel.org>, <linux-kselftest@vger.kernel.org>,
<patches@lists.linux.dev>, <mochs@nvidia.com>,
<alok.a.tiwari@oracle.com>, <vasant.hegde@amd.com>,
<dwmw2@infradead.org>, <baolu.lu@linux.intel.com>
Subject: [PATCH v6 13/25] iommufd: Add mmap interface
Date: Sat, 14 Jun 2025 00:14:38 -0700 [thread overview]
Message-ID: <c9929e0c9ec6f3f6348cd0c399d6fdfa9f35f973.1749884998.git.nicolinc@nvidia.com> (raw)
In-Reply-To: <cover.1749884998.git.nicolinc@nvidia.com>
For vIOMMU passing through HW resources to user space (VMs), allowing a VM
to control the passed through HW directly by accessing hardware registers,
add an mmap infrastructure to map the physical MMIO pages to user space.
Maintain a maple tree per ictx as a translation table managing mmappable
regions, from an allocated for-user mmap offset to an iommufd_mmap struct,
where it stores the real PFN range for an io_remap_pfn_range call.
Keep track of the lifecycle of the mmappable region by taking refcount of
its owner, so as to enforce user space to unmap the region first before it
can destroy its owner object.
To allow an IOMMU driver to add and delete mmappable regions onto/from the
maple tree, add iommufd_viommu_alloc/destroy_mmap helpers.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/iommufd/iommufd_private.h | 14 ++++++
include/linux/iommufd.h | 42 ++++++++++++++++
drivers/iommu/iommufd/driver.c | 51 ++++++++++++++++++++
drivers/iommu/iommufd/main.c | 64 +++++++++++++++++++++++++
4 files changed, 171 insertions(+)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 1bb1c0764bc2..e8192f79fe42 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -7,6 +7,7 @@
#include <linux/iommu.h>
#include <linux/iommufd.h>
#include <linux/iova_bitmap.h>
+#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/uaccess.h>
#include <linux/xarray.h>
@@ -44,6 +45,7 @@ struct iommufd_ctx {
struct xarray groups;
wait_queue_head_t destroy_wait;
struct rw_semaphore ioas_creation_lock;
+ struct maple_tree mt_mmap;
struct mutex sw_msi_lock;
struct list_head sw_msi_list;
@@ -55,6 +57,18 @@ struct iommufd_ctx {
struct iommufd_ioas *vfio_ioas;
};
+/* Entry for iommufd_ctx::mt_mmap */
+struct iommufd_mmap {
+ struct iommufd_object *owner;
+
+ /* Allocated start position in mt_mmap tree */
+ unsigned long startp;
+
+ /* Physical range for io_remap_pfn_range() */
+ unsigned long mmio_pfn;
+ unsigned long num_pfns;
+};
+
/*
* The IOVA to PFN map. The map automatically copies the PFNs into multiple
* domains and permits sharing of PFNs between io_pagetable instances. This
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index acf0e8f0c630..0da9bc8f94f3 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -251,6 +251,11 @@ int _iommufd_object_depend(struct iommufd_object *obj_dependent,
struct iommufd_object *obj_depended);
void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
struct iommufd_object *obj_depended);
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset);
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner, unsigned long offset);
struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
unsigned long vdev_id);
int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
@@ -271,6 +276,20 @@ _iommufd_object_undepend(struct iommufd_object *obj_dependent,
{
}
+static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner,
+ unsigned long offset)
+{
+}
+
static inline struct device *
iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
{
@@ -338,4 +357,27 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
_iommufd_object_undepend(&dependent->member.obj, \
&depended->member.obj); \
})
+
+/*
+ * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure.
+ *
+ * To support an mmappable MMIO region, kernel driver must first register it to
+ * iommufd core to allocate an @offset, during a driver-structure initialization
+ * (e.g. viommu_init op). Then, it should report to user space this @offset and
+ * the @length of the MMIO region for mmap syscall.
+ */
+static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu,
+ phys_addr_t mmio_addr,
+ size_t length,
+ unsigned long *offset)
+{
+ return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr,
+ length, offset);
+}
+
+static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu,
+ unsigned long offset)
+{
+ _iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset);
+}
#endif
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 70b7917da0cb..8220b61d8c8d 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -31,6 +31,57 @@ void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
}
EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD");
+/*
+ * Allocate an @offset to return to user space to use for an mmap() syscall
+ *
+ * Driver should use a per-structure helper in include/linux/iommufd.h
+ */
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset)
+{
+ struct iommufd_mmap *immap;
+ unsigned long startp;
+ int rc;
+
+ if (!PAGE_ALIGNED(mmio_addr))
+ return -EINVAL;
+ if (!length || !PAGE_ALIGNED(length))
+ return -EINVAL;
+
+ immap = kzalloc(sizeof(*immap), GFP_KERNEL);
+ if (!immap)
+ return -ENOMEM;
+ immap->owner = owner;
+ immap->num_pfns = length >> PAGE_SHIFT;
+ immap->mmio_pfn = mmio_addr >> PAGE_SHIFT;
+
+ rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->num_pfns,
+ 0, U32_MAX >> PAGE_SHIFT, GFP_KERNEL);
+ if (rc < 0) {
+ kfree(immap);
+ return rc;
+ }
+
+ immap->startp = startp;
+ /* mmap() syscall will right-shift the offset in vma->vm_pgoff */
+ *offset = startp << PAGE_SHIFT;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD");
+
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner, unsigned long offset)
+{
+ struct iommufd_mmap *immap;
+
+ immap = mtree_erase(&ictx->mt_mmap, offset >> PAGE_SHIFT);
+ WARN_ON_ONCE(!immap || immap->owner != owner);
+ kfree(immap);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD");
+
/* Caller should xa_lock(&viommu->vdevs) to protect the return value */
struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
unsigned long vdev_id)
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 4e8dbbfac890..339a269ebbc8 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -275,6 +275,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
xa_init(&ictx->groups);
ictx->file = filp;
+ mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE);
init_waitqueue_head(&ictx->destroy_wait);
mutex_init(&ictx->sw_msi_lock);
INIT_LIST_HEAD(&ictx->sw_msi_list);
@@ -479,11 +480,74 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
return ret;
}
+static void iommufd_fops_vma_open(struct vm_area_struct *vma)
+{
+ struct iommufd_mmap *immap = vma->vm_private_data;
+
+ refcount_inc(&immap->owner->users);
+}
+
+static void iommufd_fops_vma_close(struct vm_area_struct *vma)
+{
+ struct iommufd_mmap *immap = vma->vm_private_data;
+
+ refcount_dec(&immap->owner->users);
+}
+
+static const struct vm_operations_struct iommufd_vma_ops = {
+ .open = iommufd_fops_vma_open,
+ .close = iommufd_fops_vma_close,
+};
+
+/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */
+static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct iommufd_ctx *ictx = filp->private_data;
+ size_t length = vma->vm_end - vma->vm_start;
+ struct iommufd_mmap *immap;
+ int rc;
+
+ if (!PAGE_ALIGNED(length))
+ return -EINVAL;
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+ if (vma->vm_flags & VM_EXEC)
+ return -EPERM;
+
+ /* vma->vm_pgoff carries an index to an mtree entry (immap) */
+ immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
+ if (!immap)
+ return -ENXIO;
+ /*
+ * mtree_load() returns the immap for any contained pgoff, only allow
+ * the immap thing to be mapped
+ */
+ if (vma->vm_pgoff != immap->startp)
+ return -ENXIO;
+ if (length != immap->num_pfns << PAGE_SHIFT)
+ return -ENXIO;
+
+ vma->vm_pgoff = 0;
+ vma->vm_private_data = immap;
+ vma->vm_ops = &iommufd_vma_ops;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ rc = io_remap_pfn_range(vma, vma->vm_start, immap->mmio_pfn, length,
+ vma->vm_page_prot);
+ if (rc)
+ return rc;
+
+ /* vm_ops.open won't be called for mmap itself. */
+ refcount_inc(&immap->owner->users);
+ return rc;
+}
+
static const struct file_operations iommufd_fops = {
.owner = THIS_MODULE,
.open = iommufd_fops_open,
.release = iommufd_fops_release,
.unlocked_ioctl = iommufd_fops_ioctl,
+ .mmap = iommufd_fops_mmap,
};
/**
--
2.43.0
next prev parent reply other threads:[~2025-06-14 7:15 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-14 7:14 [PATCH v6 00/25] iommufd: Add vIOMMU infrastructure (Part-4 HW QUEUE) Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 01/25] iommu: Add iommu_copy_struct_to_user helper Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 02/25] iommu: Pass in a driver-level user data structure to viommu_init op Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 03/25] iommufd/viommu: Allow driver-specific user data for a vIOMMU object Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 04/25] iommufd/selftest: Support user_data in mock_viommu_alloc Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 05/25] iommufd/selftest: Add coverage for viommu data Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 06/25] iommufd/access: Allow access->ops to be NULL for internal use Nicolin Chen
2025-06-16 6:25 ` Baolu Lu
2025-06-16 13:33 ` Jason Gunthorpe
2025-06-17 2:21 ` Nicolin Chen
2025-06-19 9:14 ` Pranjal Shrivastava
2025-06-25 3:38 ` Tian, Kevin
2025-06-25 16:37 ` Nicolin Chen
2025-06-25 17:33 ` Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 07/25] iommufd/access: Add internal APIs for HW queue to use Nicolin Chen
2025-06-16 13:37 ` Jason Gunthorpe
2025-06-17 2:25 ` Nicolin Chen
2025-06-17 4:23 ` Baolu Lu
2025-06-17 11:55 ` Jason Gunthorpe
2025-06-19 9:49 ` Pranjal Shrivastava
2025-06-19 9:42 ` Pranjal Shrivastava
2025-06-14 7:14 ` [PATCH v6 08/25] iommufd/viommu: Add driver-defined vDEVICE support Nicolin Chen
2025-06-16 6:26 ` Baolu Lu
2025-06-19 10:26 ` Pranjal Shrivastava
2025-06-19 11:44 ` Jason Gunthorpe
2025-06-21 4:51 ` Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 09/25] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct Nicolin Chen
2025-06-16 13:47 ` Jason Gunthorpe
2025-06-17 2:29 ` Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 10/25] iommufd/viommu: Add IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl Nicolin Chen
2025-06-16 6:12 ` Baolu Lu
2025-06-16 6:47 ` Nicolin Chen
2025-06-16 6:54 ` Baolu Lu
2025-06-16 7:04 ` Nicolin Chen
2025-06-16 7:09 ` Baolu Lu
2025-06-25 3:43 ` Tian, Kevin
2025-06-25 16:06 ` Nicolin Chen
2025-06-16 7:11 ` Baolu Lu
2025-06-16 13:58 ` Jason Gunthorpe
2025-06-25 3:45 ` Tian, Kevin
2025-06-25 23:06 ` Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 11/25] iommufd/driver: Add iommufd_hw_queue_depend/undepend() helpers Nicolin Chen
2025-06-16 14:06 ` Jason Gunthorpe
2025-06-14 7:14 ` [PATCH v6 12/25] iommufd/selftest: Add coverage for IOMMUFD_CMD_HW_QUEUE_ALLOC Nicolin Chen
2025-06-14 7:14 ` Nicolin Chen [this message]
2025-06-16 11:33 ` [PATCH v6 13/25] iommufd: Add mmap interface Baolu Lu
2025-06-16 14:13 ` Jason Gunthorpe
2025-06-17 2:37 ` Nicolin Chen
2025-06-17 11:55 ` Jason Gunthorpe
2025-06-25 21:18 ` Nicolin Chen
2025-06-19 11:15 ` Pranjal Shrivastava
2025-06-14 7:14 ` [PATCH v6 14/25] iommufd/selftest: Add coverage for the new " Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 15/25] Documentation: userspace-api: iommufd: Update HW QUEUE Nicolin Chen
2025-06-16 11:34 ` Baolu Lu
2025-06-14 7:14 ` [PATCH v6 16/25] iommu: Allow an input type in hw_info op Nicolin Chen
2025-06-16 11:53 ` Baolu Lu
2025-06-14 7:14 ` [PATCH v6 17/25] iommufd: Allow an input data_type via iommu_hw_info Nicolin Chen
2025-06-16 11:54 ` Baolu Lu
2025-06-16 14:14 ` Jason Gunthorpe
2025-06-14 7:14 ` [PATCH v6 18/25] iommufd/selftest: Update hw_info coverage for an input data_type Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 19/25] iommu/arm-smmu-v3-iommufd: Add vsmmu_size/type and vsmmu_init impl ops Nicolin Chen
2025-06-16 14:19 ` Jason Gunthorpe
2025-06-14 7:14 ` [PATCH v6 20/25] iommu/arm-smmu-v3-iommufd: Add hw_info to impl_ops Nicolin Chen
2025-06-16 14:20 ` Jason Gunthorpe
2025-06-19 11:47 ` Pranjal Shrivastava
2025-06-19 18:53 ` Jason Gunthorpe
2025-06-20 3:32 ` Pranjal Shrivastava
2025-06-21 5:36 ` Nicolin Chen
2025-06-23 15:13 ` Pranjal Shrivastava
2025-06-14 7:14 ` [PATCH v6 21/25] iommu/tegra241-cmdqv: Use request_threaded_irq Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 22/25] iommu/tegra241-cmdqv: Simplify deinit flow in tegra241_cmdqv_remove_vintf() Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 23/25] iommu/tegra241-cmdqv: Do not statically map LVCMDQs Nicolin Chen
2025-06-16 15:44 ` Jason Gunthorpe
2025-06-14 7:14 ` [PATCH v6 24/25] iommu/tegra241-cmdqv: Add user-space use support Nicolin Chen
2025-06-16 16:03 ` Jason Gunthorpe
2025-06-26 18:51 ` Nicolin Chen
2025-06-14 7:14 ` [PATCH v6 25/25] iommu/tegra241-cmdqv: Add IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV support Nicolin Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c9929e0c9ec6f3f6348cd0c399d6fdfa9f35f973.1749884998.git.nicolinc@nvidia.com \
--to=nicolinc@nvidia.com \
--cc=alok.a.tiwari@oracle.com \
--cc=bagasdotme@gmail.com \
--cc=baolu.lu@linux.intel.com \
--cc=corbet@lwn.net \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=jonathanh@nvidia.com \
--cc=joro@8bytes.org \
--cc=jsnitsel@redhat.com \
--cc=kevin.tian@intel.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=mochs@nvidia.com \
--cc=mshavit@google.com \
--cc=nathan@kernel.org \
--cc=patches@lists.linux.dev \
--cc=peterz@infradead.org \
--cc=praan@google.com \
--cc=robin.murphy@arm.com \
--cc=shuah@kernel.org \
--cc=thierry.reding@gmail.com \
--cc=vasant.hegde@amd.com \
--cc=vdumpa@nvidia.com \
--cc=will@kernel.org \
--cc=yi.l.liu@intel.com \
--cc=zhangzekun11@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).