Archive-only list for patches
 help / color / mirror / Atom feed
From: Robin Murphy <robin.murphy@arm.com>
To: Nicolin Chen <nicolinc@nvidia.com>,
	jgg@nvidia.com, kevin.tian@intel.com, tglx@linutronix.de,
	maz@kernel.org
Cc: joro@8bytes.org, will@kernel.org, shuah@kernel.org,
	iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kselftest@vger.kernel.org, eric.auger@redhat.com,
	baolu.lu@linux.intel.com, yi.l.liu@intel.com,
	yury.norov@gmail.com, jacob.pan@linux.microsoft.com,
	patches@lists.linux.dev
Subject: Re: [PATCH v2 3/7] iommu: Make iommu_dma_prepare_msi() into a generic operation
Date: Fri, 21 Feb 2025 15:39:45 +0000	[thread overview]
Message-ID: <5b9e15e1-8081-46ef-b9db-3872e98a6f35@arm.com> (raw)
In-Reply-To: <4ca696150d2baee03af27c4ddefdb7b0b0280e7b.1740014950.git.nicolinc@nvidia.com>

On 2025-02-20 1:31 am, Nicolin Chen wrote:
> From: Jason Gunthorpe <jgg@nvidia.com>
> 
> SW_MSI supports IOMMU to translate an MSI message before the MSI message
> is delivered to the interrupt controller. On such systems, an iommu_domain
> must have a translation for the MSI message for interrupts to work.
> 
> The IRQ subsystem will call into IOMMU to request that a physical page be
> set up to receive MSI messages, and the IOMMU then sets an IOVA that maps
> to that physical page. Ultimately the IOVA is programmed into the device
> via the msi_msg.
> 
> Generalize this by allowing iommu_domain owners to provide implementations
> of this mapping. Add a function pointer in struct iommu_domain to allow a
> domain owner to provide its own implementation.
> 
> Have dma-iommu supply its implementation for IOMMU_DOMAIN_DMA types during
> the iommu_get_dma_cookie() path. For IOMMU_DOMAIN_UNMANAGED types used by
> VFIO (and iommufd for now), have the same iommu_dma_sw_msi set as well in
> the iommu_get_msi_cookie() path.
> 
> Hold the group mutex while in iommu_dma_prepare_msi() to ensure the domain
> doesn't change or become freed while running. Races with IRQ operations
> from VFIO and domain changes from iommufd are possible here.
> 
> Replace the msi_prepare_lock with a lockdep assertion for the group mutex
> as documentation. For the dmau_iommu.c each iommu_domain is unique to a
> group.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>   include/linux/iommu.h     | 44 ++++++++++++++++++++++++++-------------
>   drivers/iommu/dma-iommu.c | 33 +++++++++++++----------------
>   drivers/iommu/iommu.c     | 29 ++++++++++++++++++++++++++
>   3 files changed, 73 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index caee952febd4..761c5e186de9 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -44,6 +44,8 @@ struct iommu_dma_cookie;
>   struct iommu_fault_param;
>   struct iommufd_ctx;
>   struct iommufd_viommu;
> +struct msi_desc;
> +struct msi_msg;
>   
>   #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
>   #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
> @@ -216,6 +218,12 @@ struct iommu_domain {
>   	struct iommu_domain_geometry geometry;
>   	struct iommu_dma_cookie *iova_cookie;
>   	int (*iopf_handler)(struct iopf_group *group);
> +
> +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
> +	int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc,
> +		      phys_addr_t msi_addr);
> +#endif
> +
>   	void *fault_data;
>   	union {
>   		struct {
> @@ -234,6 +242,16 @@ struct iommu_domain {
>   	};
>   };
>   
> +static inline void iommu_domain_set_sw_msi(
> +	struct iommu_domain *domain,
> +	int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc,
> +		      phys_addr_t msi_addr))
> +{
> +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
> +	domain->sw_msi = sw_msi;
> +#endif
> +}

Yuck. Realistically we are going to have no more than two different 
implementations of this; a fiddly callback interface seems overkill. All 
we should need in the domain is a simple indicator of *which* MSI 
translation scheme is in use (if it can't be squeezed into the domain 
type itself), then iommu_dma_prepare_msi() can simply dispatch between 
iommu-dma and IOMMUFD based on that, and then it's easy to solve all the 
other fragility issues too.

Thanks,
Robin.

> +
>   static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
>   {
>   	return domain->type & __IOMMU_DOMAIN_DMA_API;
> @@ -1470,6 +1488,18 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
>   static inline void iommu_free_global_pasid(ioasid_t pasid) {}
>   #endif /* CONFIG_IOMMU_API */
>   
> +#ifdef CONFIG_IRQ_MSI_IOMMU
> +#ifdef CONFIG_IOMMU_API
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> +#else
> +static inline int iommu_dma_prepare_msi(struct msi_desc *desc,
> +					phys_addr_t msi_addr)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_IOMMU_API */
> +#endif /* CONFIG_IRQ_MSI_IOMMU */
> +
>   #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
>   void iommu_group_mutex_assert(struct device *dev);
>   #else
> @@ -1503,26 +1533,12 @@ static inline void iommu_debugfs_setup(void) {}
>   #endif
>   
>   #ifdef CONFIG_IOMMU_DMA
> -#include <linux/msi.h>
> -
>   int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
> -
> -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> -
>   #else /* CONFIG_IOMMU_DMA */
> -
> -struct msi_desc;
> -struct msi_msg;
> -
>   static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
>   {
>   	return -ENODEV;
>   }
> -
> -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> -{
> -	return 0;
> -}
>   #endif	/* CONFIG_IOMMU_DMA */
>   
>   /*
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index bf91e014d179..3b58244e6344 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -24,6 +24,7 @@
>   #include <linux/memremap.h>
>   #include <linux/mm.h>
>   #include <linux/mutex.h>
> +#include <linux/msi.h>
>   #include <linux/of_iommu.h>
>   #include <linux/pci.h>
>   #include <linux/scatterlist.h>
> @@ -102,6 +103,9 @@ static int __init iommu_dma_forcedac_setup(char *str)
>   }
>   early_param("iommu.forcedac", iommu_dma_forcedac_setup);
>   
> +static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
> +			    phys_addr_t msi_addr);
> +
>   /* Number of entries per flush queue */
>   #define IOVA_DEFAULT_FQ_SIZE	256
>   #define IOVA_SINGLE_FQ_SIZE	32768
> @@ -398,6 +402,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
>   		return -ENOMEM;
>   
>   	mutex_init(&domain->iova_cookie->mutex);
> +	iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi);
>   	return 0;
>   }
>   
> @@ -429,6 +434,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
>   
>   	cookie->msi_iova = base;
>   	domain->iova_cookie = cookie;
> +	iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi);
>   	return 0;
>   }
>   EXPORT_SYMBOL(iommu_get_msi_cookie);
> @@ -443,6 +449,9 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
>   	struct iommu_dma_cookie *cookie = domain->iova_cookie;
>   	struct iommu_dma_msi_page *msi, *tmp;
>   
> +	if (domain->sw_msi != iommu_dma_sw_msi)
> +		return;
> +
>   	if (!cookie)
>   		return;
>   
> @@ -1800,33 +1809,19 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
>   	return NULL;
>   }
>   
> -/**
> - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
> - * @desc: MSI descriptor, will store the MSI page
> - * @msi_addr: MSI target address to be mapped
> - *
> - * Return: 0 on success or negative error code if the mapping failed.
> - */
> -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> +static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
> +			    phys_addr_t msi_addr)
>   {
>   	struct device *dev = msi_desc_to_dev(desc);
> -	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iommu_dma_msi_page *msi_page;
> -	static DEFINE_MUTEX(msi_prepare_lock); /* see below */
> +	const struct iommu_dma_msi_page *msi_page;
>   
> -	if (!domain || !domain->iova_cookie) {
> +	if (!domain->iova_cookie) {
>   		msi_desc_set_iommu_msi_iova(desc, 0, 0);
>   		return 0;
>   	}
>   
> -	/*
> -	 * In fact the whole prepare operation should already be serialised by
> -	 * irq_domain_mutex further up the callchain, but that's pretty subtle
> -	 * on its own, so consider this locking as failsafe documentation...
> -	 */
> -	mutex_lock(&msi_prepare_lock);
> +	iommu_group_mutex_assert(dev);
>   	msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
> -	mutex_unlock(&msi_prepare_lock);
>   	if (!msi_page)
>   		return -ENOMEM;
>   
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 870c3cdbd0f6..022bf96a18c5 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3596,3 +3596,32 @@ int iommu_replace_group_handle(struct iommu_group *group,
>   	return ret;
>   }
>   EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
> +
> +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
> +/**
> + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
> + * @desc: MSI descriptor, will store the MSI page
> + * @msi_addr: MSI target address to be mapped
> + *
> + * The implementation of sw_msi() should take msi_addr and map it to
> + * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
> + * mapping information.
> + *
> + * Return: 0 on success or negative error code if the mapping failed.
> + */
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> +{
> +	struct device *dev = msi_desc_to_dev(desc);
> +	struct iommu_group *group = dev->iommu_group;
> +	int ret = 0;
> +
> +	if (!group)
> +		return 0;
> +
> +	mutex_lock(&group->mutex);
> +	if (group->domain && group->domain->sw_msi)
> +		ret = group->domain->sw_msi(group->domain, desc, msi_addr);
> +	mutex_unlock(&group->mutex);
> +	return ret;
> +}
> +#endif /* CONFIG_IRQ_MSI_IOMMU */


  reply	other threads:[~2025-02-21 15:39 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-20  1:31 [PATCH v2 0/7] iommu: Add MSI mapping support with nested SMMU (Part-1 core) Nicolin Chen
2025-02-20  1:31 ` [PATCH v2 1/7] genirq/msi: Store the IOMMU IOVA directly in msi_desc instead of iommu_cookie Nicolin Chen
2025-02-21  9:28   ` Thomas Gleixner
2025-02-21 11:10     ` Joerg Roedel
2025-02-21 13:41       ` Jason Gunthorpe
2025-02-21 14:00         ` Joerg Roedel
2025-02-21 14:05     ` Jason Gunthorpe
2025-02-20  1:31 ` [PATCH v2 2/7] genirq/msi: Refactor iommu_dma_compose_msi_msg() Nicolin Chen
2025-02-21  9:28   ` Thomas Gleixner
2025-02-20  1:31 ` [PATCH v2 3/7] iommu: Make iommu_dma_prepare_msi() into a generic operation Nicolin Chen
2025-02-21 15:39   ` Robin Murphy [this message]
2025-02-21 16:44     ` Jason Gunthorpe
2025-02-27 11:21       ` Robin Murphy
2025-02-27 15:32         ` Jason Gunthorpe
2025-02-27 17:46           ` Nicolin Chen
2025-02-27 19:47             ` Jason Gunthorpe
2025-02-20  1:31 ` [PATCH v2 4/7] irqchip: Have CONFIG_IRQ_MSI_IOMMU be selected by irqchips that need it Nicolin Chen
2025-02-21  9:30   ` Thomas Gleixner
2025-02-21 14:48   ` Jason Gunthorpe
2025-02-20  1:31 ` [PATCH v2 5/7] iommu: Turn fault_data to iommufd private pointer Nicolin Chen
2025-02-20 17:50   ` Jason Gunthorpe
2025-02-20  1:31 ` [PATCH v2 6/7] iommufd: Implement sw_msi support natively Nicolin Chen
2025-02-21 14:51   ` Jason Gunthorpe
2025-02-27 19:33   ` Jason Gunthorpe
2025-02-20  1:31 ` [PATCH v2 7/7] iommu: Turn iova_cookie to dma-iommu private pointer Nicolin Chen
2025-02-20 17:50   ` Jason Gunthorpe
2025-02-21 14:39   ` Jason Gunthorpe
2025-02-21 15:23     ` Robin Murphy
2025-02-21 16:48       ` Jason Gunthorpe
2025-02-26  2:25     ` Nicolin Chen
2025-02-26 17:36       ` Jason Gunthorpe
2025-02-26 18:57         ` Nicolin Chen
2025-02-26 19:18           ` Jason Gunthorpe
2025-02-21 14:59 ` [PATCH v2 0/7] iommu: Add MSI mapping support with nested SMMU (Part-1 core) Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5b9e15e1-8081-46ef-b9db-3872e98a6f35@arm.com \
    --to=robin.murphy@arm.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.pan@linux.microsoft.com \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=patches@lists.linux.dev \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    --cc=yury.norov@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox