From: Baolu Lu <baolu.lu@linux.intel.com>
To: Jacob Pan <jacob.pan@linux.microsoft.com>,
linux-kernel@vger.kernel.org,
"iommu@lists.linux.dev" <iommu@lists.linux.dev>,
Jason Gunthorpe <jgg@nvidia.com>,
Alex Williamson <alex@shazbot.org>,
Joerg Roedel <joro@8bytes.org>,
Mostafa Saleh <smostafa@google.com>,
David Matlack <dmatlack@google.com>,
Robin Murphy <robin.murphy@arm.com>,
Nicolin Chen <nicolinc@nvidia.com>,
"Tian, Kevin" <kevin.tian@intel.com>, Yi Liu <yi.l.liu@intel.com>
Cc: Saurabh Sengar <ssengar@linux.microsoft.com>,
skhawaja@google.com, pasha.tatashin@soleen.com,
Will Deacon <will@kernel.org>
Subject: Re: [PATCH v5 5/9] iommufd: Add an ioctl to query PA from IOVA for noiommu mode
Date: Wed, 13 May 2026 15:53:43 +0800 [thread overview]
Message-ID: <a8db8d35-e7de-4e74-886b-d978e1ecff06@linux.intel.com> (raw)
In-Reply-To: <20260511184116.3687392-6-jacob.pan@linux.microsoft.com>
On 5/12/26 02:41, Jacob Pan wrote:
> To support no-IOMMU mode where userspace drivers perform unsafe DMA
> using physical addresses, introduce a new API to retrieve the
> physical address of a user-allocated DMA buffer that has been mapped to
> an IOVA via IOAS. The mapping is backed by SW-only I/O page tables
> maintained by the generic IOMMUPT framework.
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
> ---
> v5:
> - Add header stubs for iopt_get_phys() and
> iommufd_ioas_noiommu_get_pa() to avoid ifdef at call sites (Kevin)
> v4:
> - Fix ioctl return type (Yi Liu)
> v2:
> - New patch
> ---
> drivers/iommu/iommufd/io_pagetable.c | 62 +++++++++++++++++++++++++
> drivers/iommu/iommufd/ioas.c | 30 ++++++++++++
> drivers/iommu/iommufd/iommufd_private.h | 18 +++++++
> drivers/iommu/iommufd/main.c | 3 ++
> include/uapi/linux/iommufd.h | 25 ++++++++++
> 5 files changed, 138 insertions(+)
>
> diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
> index 24d4917105d9..1ee7c8e6408c 100644
> --- a/drivers/iommu/iommufd/io_pagetable.c
> +++ b/drivers/iommu/iommufd/io_pagetable.c
> @@ -859,6 +859,68 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
> return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
> }
>
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
> + u64 *length)
> +{
> + struct iopt_area *area;
> + u64 tmp_length = 0;
> + u64 tmp_paddr = 0;
> + int rc = 0;
> +
> + down_read(&iopt->iova_rwsem);
> + area = iopt_area_iter_first(iopt, iova, iova);
> + if (!area || !area->pages) {
> + rc = -ENOENT;
> + goto unlock_exit;
> + }
> +
> + if (!area->storage_domain ||
> + area->storage_domain->owner != &iommufd_noiommu_ops) {
> + rc = -EOPNOTSUPP;
> + goto unlock_exit;
> + }
> +
> + *paddr = iommu_iova_to_phys(area->storage_domain, iova);
> + if (!*paddr) {
> + rc = -EINVAL;
> + goto unlock_exit;
> + }
> +
> + tmp_length = PAGE_SIZE - offset_in_page(iova);
> + tmp_paddr = *paddr;
> + /*
> + * Scan the domain for the contiguous physical address length so that
> + * userspace search can be optimized for fewer ioctls.
> + */
> + while (iova < iopt_area_last_iova(area)) {
> + unsigned long next_iova;
> + u64 next_paddr;
> +
> + if (check_add_overflow(iova, PAGE_SIZE, &next_iova))
> + break;
> +
> + if (next_iova > iopt_area_last_iova(area))
> + break;
> +
> + next_paddr = iommu_iova_to_phys(area->storage_domain, next_iova);
> +
> + if (!next_paddr || next_paddr != tmp_paddr + PAGE_SIZE)
> + break;
> +
> + iova = next_iova;
> + tmp_paddr += PAGE_SIZE;
> + tmp_length += PAGE_SIZE;
> + }
> + *length = tmp_length;
> +
> +unlock_exit:
> + up_read(&iopt->iova_rwsem);
> +
> + return rc;
> +}
> +#endif
> +
> int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
> {
> /* If the IOVAs are empty then unmap all succeeds */
> diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
> index fed06c2b728e..666440e32c9e 100644
> --- a/drivers/iommu/iommufd/ioas.c
> +++ b/drivers/iommu/iommufd/ioas.c
> @@ -375,6 +375,36 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
> return rc;
> }
>
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
> +{
> + struct iommu_ioas_noiommu_get_pa *cmd = ucmd->cmd;
> + struct iommufd_ioas *ioas;
> + int rc;
> +
> + if (!capable(CAP_SYS_RAWIO))
> + return -EPERM;
> +
> + if (cmd->flags || cmd->__reserved)
> + return -EOPNOTSUPP;
> +
> + ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
> + if (IS_ERR(ioas))
> + return PTR_ERR(ioas);
> +
> + rc = iopt_get_phys(&ioas->iopt, cmd->iova, &cmd->out_phys,
> + &cmd->out_length);
> + if (rc)
> + goto out_put;
> +
> + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
> +out_put:
> + iommufd_put_object(ucmd->ictx, &ioas->obj);
> +
> + return rc;
> +}
> +#endif
> +
> static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx,
> struct xarray *ioas_list)
> {
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 2682b5baa6e9..13f1506d8066 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -118,6 +118,16 @@ int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
> int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
> unsigned long length, unsigned long *unmapped);
> int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
> + u64 *length);
> +#else
> +static inline int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova,
> + u64 *paddr, u64 *length)
> +{
> + return -EOPNOTSUPP;
> +}
> +#endif
>
> int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
> struct iommu_domain *domain,
> @@ -346,6 +356,14 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
> int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
> int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
> int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd);
> +#else
> +static inline int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
> +{
> + return -EOPNOTSUPP;
> +}
> +#endif
> int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
> int iommufd_option_rlimit_mode(struct iommu_option *cmd,
> struct iommufd_ctx *ictx);
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 8c6d43601afb..3b4192d70570 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -424,6 +424,7 @@ union ucmd_buffer {
> struct iommu_ioas_alloc alloc;
> struct iommu_ioas_allow_iovas allow_iovas;
> struct iommu_ioas_copy ioas_copy;
> + struct iommu_ioas_noiommu_get_pa noiommu_get_pa;
> struct iommu_ioas_iova_ranges iova_ranges;
> struct iommu_ioas_map map;
> struct iommu_ioas_unmap unmap;
> @@ -482,6 +483,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
> IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova),
> IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file,
> struct iommu_ioas_map_file, iova),
> + IOCTL_OP(IOMMU_IOAS_NOIOMMU_GET_PA, iommufd_ioas_noiommu_get_pa, struct iommu_ioas_noiommu_get_pa,
> + out_phys),
> IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
> length),
> IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index e998dfbd6960..7df366d161f1 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -57,6 +57,7 @@ enum {
> IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
> IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
> IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
> + IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA = 0x95,
> };
>
> /**
> @@ -219,6 +220,30 @@ struct iommu_ioas_map {
> };
> #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
>
> +/**
> + * struct iommu_ioas_noiommu_get_pa - ioctl(IOMMU_IOAS_NOIOMMU_GET_PA)
> + * @size: sizeof(struct iommu_ioas_noiommu_get_pa)
> + * @flags: Reserved, must be 0 for now
> + * @ioas_id: IOAS ID to query IOVA to PA mapping from
> + * @__reserved: Must be 0
> + * @iova: IOVA to query
> + * @out_length: Number of bytes contiguous physical address starting from phys
Nit: Instead of making this behavior mandatory, would it be valuable to
allocate a bit in @flags to toggle this behavior? For extremely large
mappings (e.g., several GBs of contiguous hugepages), the loop to
determine the contiguous physical addresses might take a long time. A
very long scan could theoretically delay userspace DMA setup.
> + * @out_phys: Output physical address the IOVA maps to
> + *
> + * Query the physical address backing an IOVA range. The entire range must be
> + * mapped already. For noiommu devices doing unsafe DMA only.
> + */
> +struct iommu_ioas_noiommu_get_pa {
> + __u32 size;
> + __u32 flags;
> + __u32 ioas_id;
> + __u32 __reserved;
> + __aligned_u64 iova;
> + __aligned_u64 out_length;
> + __aligned_u64 out_phys;
> +};
> +#define IOMMU_IOAS_NOIOMMU_GET_PA _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA)
> +
> /**
> * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
> * @size: sizeof(struct iommu_ioas_map_file)
Otherwise, this looks good to me,
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
next prev parent reply other threads:[~2026-05-13 7:54 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 18:41 [PATCH v5 0/9] iommufd: Enable noiommu mode for cdev Jacob Pan
2026-05-11 18:41 ` [PATCH v5 1/9] vfio: Rename VFIO_NOIOMMU to VFIO_GROUP_NOIOMMU Jacob Pan
2026-05-11 18:41 ` [PATCH v5 2/9] iommufd: Support a HWPT without an iommu driver for noiommu Jacob Pan
2026-05-13 6:58 ` Baolu Lu
2026-05-13 21:30 ` Jacob Pan
2026-05-13 19:18 ` Samiullah Khawaja
2026-05-11 18:41 ` [PATCH v5 3/9] iommufd: Move igroup allocation to a function Jacob Pan
2026-05-13 7:18 ` Baolu Lu
2026-05-11 18:41 ` [PATCH v5 4/9] iommufd: Allow binding to a noiommu device Jacob Pan
2026-05-13 7:37 ` Baolu Lu
2026-05-13 22:08 ` Jacob Pan
2026-05-14 6:51 ` Baolu Lu
2026-05-11 18:41 ` [PATCH v5 5/9] iommufd: Add an ioctl to query PA from IOVA for noiommu mode Jacob Pan
2026-05-11 18:58 ` Jacob Pan
2026-05-13 7:53 ` Baolu Lu [this message]
2026-05-13 12:22 ` Jason Gunthorpe
2026-05-13 22:20 ` Jacob Pan
2026-05-13 23:26 ` Jason Gunthorpe
2026-05-11 18:41 ` [PATCH v5 6/9] vfio/group: Add VFIO_CDEV_NOIOMMU Kconfig and tolerate NULL group Jacob Pan
2026-05-11 18:41 ` [PATCH v5 7/9] vfio: Enable cdev noiommu mode under iommufd Jacob Pan
2026-05-11 18:41 ` [PATCH v5 8/9] selftests/vfio: Add iommufd noiommu mode selftest for cdev Jacob Pan
2026-05-11 18:41 ` [PATCH v5 9/9] Documentation: Update VFIO NOIOMMU mode Jacob Pan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a8db8d35-e7de-4e74-886b-d978e1ecff06@linux.intel.com \
--to=baolu.lu@linux.intel.com \
--cc=alex@shazbot.org \
--cc=dmatlack@google.com \
--cc=iommu@lists.linux.dev \
--cc=jacob.pan@linux.microsoft.com \
--cc=jgg@nvidia.com \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=nicolinc@nvidia.com \
--cc=pasha.tatashin@soleen.com \
--cc=robin.murphy@arm.com \
--cc=skhawaja@google.com \
--cc=smostafa@google.com \
--cc=ssengar@linux.microsoft.com \
--cc=will@kernel.org \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.