The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Baolu Lu <baolu.lu@linux.intel.com>
To: Jacob Pan <jacob.pan@linux.microsoft.com>,
	linux-kernel@vger.kernel.org,
	"iommu@lists.linux.dev" <iommu@lists.linux.dev>,
	Jason Gunthorpe <jgg@nvidia.com>,
	Alex Williamson <alex@shazbot.org>,
	Joerg Roedel <joro@8bytes.org>,
	Mostafa Saleh <smostafa@google.com>,
	David Matlack <dmatlack@google.com>,
	Robin Murphy <robin.murphy@arm.com>,
	Nicolin Chen <nicolinc@nvidia.com>,
	"Tian, Kevin" <kevin.tian@intel.com>, Yi Liu <yi.l.liu@intel.com>
Cc: Saurabh Sengar <ssengar@linux.microsoft.com>,
	skhawaja@google.com, pasha.tatashin@soleen.com,
	Will Deacon <will@kernel.org>
Subject: Re: [PATCH v5 5/9] iommufd: Add an ioctl to query PA from IOVA for noiommu mode
Date: Wed, 13 May 2026 15:53:43 +0800	[thread overview]
Message-ID: <a8db8d35-e7de-4e74-886b-d978e1ecff06@linux.intel.com> (raw)
In-Reply-To: <20260511184116.3687392-6-jacob.pan@linux.microsoft.com>

On 5/12/26 02:41, Jacob Pan wrote:
> To support no-IOMMU mode where userspace drivers perform unsafe DMA
> using physical addresses, introduce a new API to retrieve the
> physical address of a user-allocated DMA buffer that has been mapped to
> an IOVA via IOAS. The mapping is backed by SW-only I/O page tables
> maintained by the generic IOMMUPT framework.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
> ---
> v5:
>     - Add header stubs for iopt_get_phys() and
>       iommufd_ioas_noiommu_get_pa() to avoid ifdef at call sites (Kevin)
> v4:
>     - Fix ioctl return type (Yi Liu)
> v2:
>     - New patch
> ---
>   drivers/iommu/iommufd/io_pagetable.c    | 62 +++++++++++++++++++++++++
>   drivers/iommu/iommufd/ioas.c            | 30 ++++++++++++
>   drivers/iommu/iommufd/iommufd_private.h | 18 +++++++
>   drivers/iommu/iommufd/main.c            |  3 ++
>   include/uapi/linux/iommufd.h            | 25 ++++++++++
>   5 files changed, 138 insertions(+)
> 
> diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
> index 24d4917105d9..1ee7c8e6408c 100644
> --- a/drivers/iommu/iommufd/io_pagetable.c
> +++ b/drivers/iommu/iommufd/io_pagetable.c
> @@ -859,6 +859,68 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
>   	return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
>   }
>   
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
> +		  u64 *length)
> +{
> +	struct iopt_area *area;
> +	u64 tmp_length = 0;
> +	u64 tmp_paddr = 0;
> +	int rc = 0;
> +
> +	down_read(&iopt->iova_rwsem);
> +	area = iopt_area_iter_first(iopt, iova, iova);
> +	if (!area || !area->pages) {
> +		rc = -ENOENT;
> +		goto unlock_exit;
> +	}
> +
> +	if (!area->storage_domain ||
> +	    area->storage_domain->owner != &iommufd_noiommu_ops) {
> +		rc = -EOPNOTSUPP;
> +		goto unlock_exit;
> +	}
> +
> +	*paddr = iommu_iova_to_phys(area->storage_domain, iova);
> +	if (!*paddr) {
> +		rc = -EINVAL;
> +		goto unlock_exit;
> +	}
> +
> +	tmp_length = PAGE_SIZE - offset_in_page(iova);
> +	tmp_paddr = *paddr;
> +	/*
> +	 * Scan the domain for the contiguous physical address length so that
> +	 * userspace search can be optimized for fewer ioctls.
> +	 */
> +	while (iova < iopt_area_last_iova(area)) {
> +		unsigned long next_iova;
> +		u64 next_paddr;
> +
> +		if (check_add_overflow(iova, PAGE_SIZE, &next_iova))
> +			break;
> +
> +		if (next_iova > iopt_area_last_iova(area))
> +			break;
> +
> +		next_paddr = iommu_iova_to_phys(area->storage_domain, next_iova);
> +
> +		if (!next_paddr || next_paddr != tmp_paddr + PAGE_SIZE)
> +			break;
> +
> +		iova = next_iova;
> +		tmp_paddr += PAGE_SIZE;
> +		tmp_length += PAGE_SIZE;
> +	}
> +	*length = tmp_length;
> +
> +unlock_exit:
> +	up_read(&iopt->iova_rwsem);
> +
> +	return rc;
> +}
> +#endif
> +
>   int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
>   {
>   	/* If the IOVAs are empty then unmap all succeeds */
> diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
> index fed06c2b728e..666440e32c9e 100644
> --- a/drivers/iommu/iommufd/ioas.c
> +++ b/drivers/iommu/iommufd/ioas.c
> @@ -375,6 +375,36 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
>   	return rc;
>   }
>   
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
> +{
> +	struct iommu_ioas_noiommu_get_pa *cmd = ucmd->cmd;
> +	struct iommufd_ioas *ioas;
> +	int rc;
> +
> +	if (!capable(CAP_SYS_RAWIO))
> +		return -EPERM;
> +
> +	if (cmd->flags || cmd->__reserved)
> +		return -EOPNOTSUPP;
> +
> +	ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
> +	if (IS_ERR(ioas))
> +		return PTR_ERR(ioas);
> +
> +	rc = iopt_get_phys(&ioas->iopt, cmd->iova, &cmd->out_phys,
> +			   &cmd->out_length);
> +	if (rc)
> +		goto out_put;
> +
> +	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
> +out_put:
> +	iommufd_put_object(ucmd->ictx, &ioas->obj);
> +
> +	return rc;
> +}
> +#endif
> +
>   static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx,
>   					   struct xarray *ioas_list)
>   {
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 2682b5baa6e9..13f1506d8066 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -118,6 +118,16 @@ int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
>   int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
>   		    unsigned long length, unsigned long *unmapped);
>   int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
> +		  u64 *length);
> +#else
> +static inline int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova,
> +				u64 *paddr, u64 *length)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
>   
>   int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
>   				   struct iommu_domain *domain,
> @@ -346,6 +356,14 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
>   int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
>   int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
>   int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
> +#ifdef CONFIG_IOMMUFD_NOIOMMU
> +int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd);
> +#else
> +static inline int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
>   int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
>   int iommufd_option_rlimit_mode(struct iommu_option *cmd,
>   			       struct iommufd_ctx *ictx);
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 8c6d43601afb..3b4192d70570 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -424,6 +424,7 @@ union ucmd_buffer {
>   	struct iommu_ioas_alloc alloc;
>   	struct iommu_ioas_allow_iovas allow_iovas;
>   	struct iommu_ioas_copy ioas_copy;
> +	struct iommu_ioas_noiommu_get_pa noiommu_get_pa;
>   	struct iommu_ioas_iova_ranges iova_ranges;
>   	struct iommu_ioas_map map;
>   	struct iommu_ioas_unmap unmap;
> @@ -482,6 +483,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
>   	IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova),
>   	IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file,
>   		 struct iommu_ioas_map_file, iova),
> +	IOCTL_OP(IOMMU_IOAS_NOIOMMU_GET_PA, iommufd_ioas_noiommu_get_pa, struct iommu_ioas_noiommu_get_pa,
> +		 out_phys),
>   	IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
>   		 length),
>   	IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index e998dfbd6960..7df366d161f1 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -57,6 +57,7 @@ enum {
>   	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
>   	IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
>   	IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
> +	IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA = 0x95,
>   };
>   
>   /**
> @@ -219,6 +220,30 @@ struct iommu_ioas_map {
>   };
>   #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
>   
> +/**
> + * struct iommu_ioas_noiommu_get_pa - ioctl(IOMMU_IOAS_NOIOMMU_GET_PA)
> + * @size: sizeof(struct iommu_ioas_noiommu_get_pa)
> + * @flags: Reserved, must be 0 for now
> + * @ioas_id: IOAS ID to query IOVA to PA mapping from
> + * @__reserved: Must be 0
> + * @iova: IOVA to query
> + * @out_length: Number of bytes contiguous physical address starting from phys

Nit: Instead of making this behavior mandatory, would it be valuable to
allocate a bit in @flags to toggle this behavior? For extremely large
mappings (e.g., several GBs of contiguous hugepages), the loop to
determine the contiguous physical addresses might take a long time. A
very long scan could theoretically delay userspace DMA setup.

> + * @out_phys: Output physical address the IOVA maps to
> + *
> + * Query the physical address backing an IOVA range. The entire range must be
> + * mapped already. For noiommu devices doing unsafe DMA only.
> + */
> +struct iommu_ioas_noiommu_get_pa {
> +	__u32 size;
> +	__u32 flags;
> +	__u32 ioas_id;
> +	__u32 __reserved;
> +	__aligned_u64 iova;
> +	__aligned_u64 out_length;
> +	__aligned_u64 out_phys;
> +};
> +#define IOMMU_IOAS_NOIOMMU_GET_PA _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA)
> +
>   /**
>    * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
>    * @size: sizeof(struct iommu_ioas_map_file)

Otherwise, this looks good to me,

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>

  parent reply	other threads:[~2026-05-13  7:54 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-11 18:41 [PATCH v5 0/9] iommufd: Enable noiommu mode for cdev Jacob Pan
2026-05-11 18:41 ` [PATCH v5 1/9] vfio: Rename VFIO_NOIOMMU to VFIO_GROUP_NOIOMMU Jacob Pan
2026-05-11 18:41 ` [PATCH v5 2/9] iommufd: Support a HWPT without an iommu driver for noiommu Jacob Pan
2026-05-13  6:58   ` Baolu Lu
2026-05-13 21:30     ` Jacob Pan
2026-05-13 19:18   ` Samiullah Khawaja
2026-05-11 18:41 ` [PATCH v5 3/9] iommufd: Move igroup allocation to a function Jacob Pan
2026-05-13  7:18   ` Baolu Lu
2026-05-11 18:41 ` [PATCH v5 4/9] iommufd: Allow binding to a noiommu device Jacob Pan
2026-05-13  7:37   ` Baolu Lu
2026-05-13 22:08     ` Jacob Pan
2026-05-14  6:51       ` Baolu Lu
2026-05-11 18:41 ` [PATCH v5 5/9] iommufd: Add an ioctl to query PA from IOVA for noiommu mode Jacob Pan
2026-05-11 18:58   ` Jacob Pan
2026-05-13  7:53   ` Baolu Lu [this message]
2026-05-13 12:22     ` Jason Gunthorpe
2026-05-13 22:20       ` Jacob Pan
2026-05-13 23:26         ` Jason Gunthorpe
2026-05-11 18:41 ` [PATCH v5 6/9] vfio/group: Add VFIO_CDEV_NOIOMMU Kconfig and tolerate NULL group Jacob Pan
2026-05-11 18:41 ` [PATCH v5 7/9] vfio: Enable cdev noiommu mode under iommufd Jacob Pan
2026-05-11 18:41 ` [PATCH v5 8/9] selftests/vfio: Add iommufd noiommu mode selftest for cdev Jacob Pan
2026-05-11 18:41 ` [PATCH v5 9/9] Documentation: Update VFIO NOIOMMU mode Jacob Pan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a8db8d35-e7de-4e74-886b-d978e1ecff06@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=alex@shazbot.org \
    --cc=dmatlack@google.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.pan@linux.microsoft.com \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=robin.murphy@arm.com \
    --cc=skhawaja@google.com \
    --cc=smostafa@google.com \
    --cc=ssengar@linux.microsoft.com \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox