From: Jacob Pan <jacob.pan@linux.microsoft.com>
To: linux-kernel@vger.kernel.org,
"iommu@lists.linux.dev" <iommu@lists.linux.dev>,
Jason Gunthorpe <jgg@nvidia.com>,
Alex Williamson <alex@shazbot.org>,
Joerg Roedel <joro@8bytes.org>,
Mostafa Saleh <smostafa@google.com>,
David Matlack <dmatlack@google.com>,
Robin Murphy <robin.murphy@arm.com>,
Nicolin Chen <nicolinc@nvidia.com>,
"Tian, Kevin" <kevin.tian@intel.com>, Yi Liu <yi.l.liu@intel.com>,
Baolu Lu <baolu.lu@linux.intel.com>
Cc: Saurabh Sengar <ssengar@linux.microsoft.com>,
skhawaja@google.com, pasha.tatashin@soleen.com,
Will Deacon <will@kernel.org>,
Jacob Pan <jacob.pan@linux.microsoft.com>
Subject: [PATCH v8 4/6] iommufd: Add an ioctl to query PA from IOVA for noiommu mode
Date: Wed, 3 Jun 2026 15:02:09 -0700 [thread overview]
Message-ID: <20260603220211.2584590-5-jacob.pan@linux.microsoft.com> (raw)
In-Reply-To: <20260603220211.2584590-1-jacob.pan@linux.microsoft.com>
To support no-IOMMU mode where userspace drivers perform unsafe DMA
using physical addresses, introduce a new API to retrieve the
physical address of a user-allocated DMA buffer that has been mapped to
an IOVA via IOMMU_IOAS_MAP. The mapping is backed by SW-only I/O page
tables maintained by the GENERIC_PT framework.
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
---
v8:
- Fix comment on start IOVA range (Kevin)
v7:
- Fix commit message (Yi)
- Avoid duplicated tmp_length settting (yi)
- Handle race with dma-buf revoke pages (Sashiko)
v6:
- Limit search length (Baolu, Jason)
v5:
- Fix next_iova exceeds iopt_area_last_iova (Alex)
- Rename IOCTL more specific to NOIOMMU, i.e.
IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA (Kevin)
- Add header stubs for iopt_get_phys()
v4:
- Fix ioctl return type (Yi Liu)
fix comment get_pa
Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
---
drivers/iommu/iommufd/io_pagetable.c | 80 +++++++++++++++++++++++++
drivers/iommu/iommufd/ioas.c | 33 ++++++++++
drivers/iommu/iommufd/iommufd_private.h | 18 ++++++
drivers/iommu/iommufd/main.c | 3 +
include/uapi/linux/iommufd.h | 27 +++++++++
5 files changed, 161 insertions(+)
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 24d4917105d9..667c2d07e08b 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -859,6 +859,86 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
}
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
+ u64 *length)
+{
+ struct iopt_area *area;
+ struct iopt_pages *pages;
+ u64 max_length = *length;
+ u64 tmp_length = 0;
+ u64 tmp_paddr = 0;
+ int rc = 0;
+
+ down_read(&iopt->iova_rwsem);
+ area = iopt_area_iter_first(iopt, iova, iova);
+ if (!area || !area->pages) {
+ rc = -ENOENT;
+ goto unlock_exit;
+ }
+
+ pages = area->pages;
+ mutex_lock(&pages->mutex);
+ if (iopt_dmabuf_revoked(pages)) {
+ rc = -EINVAL;
+ goto unlock_pages;
+ }
+
+ if (!area->storage_domain ||
+ area->storage_domain->owner != &iommufd_noiommu_ops) {
+ rc = -EOPNOTSUPP;
+ goto unlock_pages;
+ }
+
+ *paddr = iommu_iova_to_phys(area->storage_domain, iova);
+ if (!*paddr) {
+ rc = -EINVAL;
+ goto unlock_pages;
+ }
+
+ tmp_length = PAGE_SIZE - offset_in_page(iova);
+ tmp_paddr = *paddr;
+ /*
+ * Scan the domain for the contiguous physical address length so that
+ * userspace search can be optimized for fewer ioctls. A max_length of
+ * 0 means no limit.
+ */
+ while (iova < iopt_area_last_iova(area)) {
+ unsigned long next_iova;
+ u64 next_paddr;
+
+ if (max_length && tmp_length >= max_length)
+ break;
+
+ if (check_add_overflow(iova, PAGE_SIZE, &next_iova))
+ break;
+
+ if (next_iova > iopt_area_last_iova(area))
+ break;
+
+ next_paddr = iommu_iova_to_phys(area->storage_domain, next_iova);
+
+ if (!next_paddr || next_paddr != tmp_paddr + PAGE_SIZE)
+ break;
+
+ iova = next_iova;
+ tmp_paddr += PAGE_SIZE;
+ tmp_length += PAGE_SIZE;
+ }
+
+ if (max_length && tmp_length > max_length)
+ tmp_length = max_length;
+ *length = tmp_length;
+
+unlock_pages:
+ mutex_unlock(&pages->mutex);
+unlock_exit:
+ up_read(&iopt->iova_rwsem);
+
+ return rc;
+}
+#endif
+
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
{
/* If the IOVAs are empty then unmap all succeeds */
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index fed06c2b728e..ad1c3031f6a9 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -375,6 +375,39 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
return rc;
}
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_noiommu_get_pa *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (cmd->flags || cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ if (cmd->iova >= ULONG_MAX)
+ return -EOVERFLOW;
+
+ ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ rc = iopt_get_phys(&ioas->iopt, cmd->iova, &cmd->out_phys,
+ &cmd->length);
+ if (rc)
+ goto out_put;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put:
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
+
+ return rc;
+}
+#endif
+
static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx,
struct xarray *ioas_list)
{
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index c8ed612e896a..15909ba75c18 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -118,6 +118,16 @@ int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, unsigned long *unmapped);
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
+ u64 *length);
+#else
+static inline int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova,
+ u64 *paddr, u64 *length)
+{
+ return -EOPNOTSUPP;
+}
+#endif
int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain,
@@ -346,6 +356,14 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd);
+#else
+static inline int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
+{
+ return -EOPNOTSUPP;
+}
+#endif
int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index f6ae60bd3f70..a4668995269c 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -424,6 +424,7 @@ union ucmd_buffer {
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
struct iommu_ioas_copy ioas_copy;
+ struct iommu_ioas_noiommu_get_pa noiommu_get_pa;
struct iommu_ioas_iova_ranges iova_ranges;
struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap;
@@ -482,6 +483,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova),
IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file,
struct iommu_ioas_map_file, iova),
+ IOCTL_OP(IOMMU_IOAS_NOIOMMU_GET_PA, iommufd_ioas_noiommu_get_pa, struct iommu_ioas_noiommu_get_pa,
+ out_phys),
IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
length),
IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index e998dfbd6960..552bc5c096b4 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -57,6 +57,7 @@ enum {
IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
+ IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA = 0x95,
};
/**
@@ -219,6 +220,32 @@ struct iommu_ioas_map {
};
#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
+/**
+ * struct iommu_ioas_noiommu_get_pa - ioctl(IOMMU_IOAS_NOIOMMU_GET_PA)
+ * @size: sizeof(struct iommu_ioas_noiommu_get_pa)
+ * @flags: Reserved, must be 0 for now
+ * @ioas_id: IOAS ID to query IOVA to PA mapping from
+ * @__reserved: Must be 0
+ * @iova: IOVA to query
+ * @length: On input, maximum number of bytes to scan for contiguity (0 means
+ * no limit). On output, actual number of contiguous bytes starting
+ * from out_phys.
+ * @out_phys: Output physical address the IOVA maps to
+ *
+ * Query the physical address backing an IOVA range. The beginning of the
+ * range must be mapped already. For noiommu devices doing unsafe DMA only.
+ */
+struct iommu_ioas_noiommu_get_pa {
+ __u32 size;
+ __u32 flags;
+ __u32 ioas_id;
+ __u32 __reserved;
+ __aligned_u64 iova;
+ __aligned_u64 length;
+ __aligned_u64 out_phys;
+};
+#define IOMMU_IOAS_NOIOMMU_GET_PA _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA)
+
/**
* struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
* @size: sizeof(struct iommu_ioas_map_file)
--
2.43.0
next prev parent reply other threads:[~2026-06-03 22:02 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-03 22:02 [PATCH v8 0/6] iommufd: Enable noiommu mode for cdev Jacob Pan
2026-06-03 22:02 ` [PATCH v8 1/6] iommufd: Support a HWPT without an iommu driver for noiommu Jacob Pan
2026-06-03 22:02 ` [PATCH v8 2/6] iommufd: Move igroup allocation to a function Jacob Pan
2026-06-03 22:02 ` [PATCH v8 3/6] iommufd: Allow binding to a noiommu device Jacob Pan
2026-06-03 22:02 ` Jacob Pan [this message]
2026-06-03 22:02 ` [PATCH v8 5/6] vfio: Enable cdev noiommu mode under iommufd Jacob Pan
2026-06-08 23:19 ` Alex Williamson
2026-06-09 18:50 ` Jacob Pan
2026-06-09 20:07 ` Alex Williamson
2026-06-09 21:11 ` Jacob Pan
2026-06-03 22:02 ` [PATCH v8 6/6] Documentation: Update VFIO NOIOMMU mode Jacob Pan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260603220211.2584590-5-jacob.pan@linux.microsoft.com \
--to=jacob.pan@linux.microsoft.com \
--cc=alex@shazbot.org \
--cc=baolu.lu@linux.intel.com \
--cc=dmatlack@google.com \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=nicolinc@nvidia.com \
--cc=pasha.tatashin@soleen.com \
--cc=robin.murphy@arm.com \
--cc=skhawaja@google.com \
--cc=smostafa@google.com \
--cc=ssengar@linux.microsoft.com \
--cc=will@kernel.org \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.