From: Alex Williamson <alex.williamson@redhat.com>
To: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Cc: kvm@vger.kernel.org, marc.zyngier@arm.com, will.deacon@arm.com,
kvmarm@lists.cs.columbia.edu
Subject: Re: [RFC PATCH 4/6] vfio: Add interface to iommu-map/unmap MSI pages
Date: Fri, 02 Oct 2015 16:46:20 -0600 [thread overview]
Message-ID: <1443825980.26107.189.camel@redhat.com> (raw)
In-Reply-To: <1443624989-24346-4-git-send-email-Bharat.Bhushan@freescale.com>
On Wed, 2015-09-30 at 20:26 +0530, Bharat Bhushan wrote:
> For MSI interrupts to work for a pass-through devices we need
> to have mapping of msi-pages in iommu. Now on some platforms
> (like x86) does this msi-pages mapping happens magically and in these
> case they chooses an iova which they somehow know that it will never
> overlap with guest memory. But this magic iova selection
> may not be always true for all platform (like PowerPC and ARM64).
>
> Also on x86 platform, there is no problem as long as running a x86-guest
> on x86-host but there can be issues when running a non-x86 guest on
> x86 host or other userspace applications like (I think ODP/DPDK).
> As in these cases there can be chances that it overlaps with guest
> memory mapping.
Wow, it's amazing anything works... smoke and mirrors.
> This patch add interface to iommu-map and iommu-unmap msi-pages at
> reserved iova chosen by userspace.
>
> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
> ---
> drivers/vfio/vfio.c | 52 +++++++++++++++++++
> drivers/vfio/vfio_iommu_type1.c | 111 ++++++++++++++++++++++++++++++++++++++++
> include/linux/vfio.h | 9 +++-
> 3 files changed, 171 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 2fb29df..a817d2d 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -605,6 +605,58 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
> return NOTIFY_OK;
> }
>
> +int vfio_device_map_msi(struct vfio_device *device, uint64_t msi_addr,
> + uint32_t size, uint64_t *msi_iova)
> +{
> + struct vfio_container *container = device->group->container;
> + struct vfio_iommu_driver *driver;
> + int ret;
> +
> + /* Validate address and size */
> + if (!msi_addr || !size || !msi_iova)
> + return -EINVAL;
> +
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (!driver || !driver->ops || !driver->ops->msi_map) {
> + up_read(&container->group_lock);
> + return -EINVAL;
> + }
> +
> + ret = driver->ops->msi_map(container->iommu_data,
> + msi_addr, size, msi_iova);
> +
> + up_read(&container->group_lock);
> + return ret;
> +}
> +
> +int vfio_device_unmap_msi(struct vfio_device *device, uint64_t msi_iova,
> + uint64_t size)
> +{
> + struct vfio_container *container = device->group->container;
> + struct vfio_iommu_driver *driver;
> + int ret;
> +
> + /* Validate address and size */
> + if (!msi_iova || !size)
> + return -EINVAL;
> +
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (!driver || !driver->ops || !driver->ops->msi_unmap) {
> + up_read(&container->group_lock);
> + return -EINVAL;
> + }
> +
> + ret = driver->ops->msi_unmap(container->iommu_data,
> + msi_iova, size);
> +
> + up_read(&container->group_lock);
> + return ret;
> +}
> +
> /**
> * VFIO driver API
> */
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 3315fb6..ab376c2 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1003,12 +1003,34 @@ out_free:
> return ret;
> }
>
> +static void vfio_iommu_unmap_all_reserved_regions(struct vfio_iommu *iommu)
> +{
> + struct vfio_resvd_region *region;
> + struct vfio_domain *d;
> +
> + list_for_each_entry(region, &iommu->reserved_iova_list, next) {
> + list_for_each_entry(d, &iommu->domain_list, next) {
> + if (!region->map_paddr)
> + continue;
> +
> + if (!iommu_iova_to_phys(d->domain, region->iova))
> + continue;
> +
> + iommu_unmap(d->domain, region->iova, PAGE_SIZE);
PAGE_SIZE? Why not region->size?
> + region->map_paddr = 0;
> + cond_resched();
> + }
> + }
> +}
> +
> static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
> {
> struct rb_node *node;
>
> while ((node = rb_first(&iommu->dma_list)))
> vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
> +
> + vfio_iommu_unmap_all_reserved_regions(iommu);
> }
>
> static void vfio_iommu_type1_detach_group(void *iommu_data,
> @@ -1048,6 +1070,93 @@ done:
> mutex_unlock(&iommu->lock);
> }
>
> +static int vfio_iommu_type1_msi_map(void *iommu_data, uint64_t msi_addr,
> + uint64_t size, uint64_t *msi_iova)
> +{
> + struct vfio_iommu *iommu = iommu_data;
> + struct vfio_resvd_region *region;
> + int ret;
> +
> + mutex_lock(&iommu->lock);
> +
> + /* Do not try ceate iommu-mapping if msi reconfig not allowed */
> + if (!iommu->allow_msi_reconfig) {
> + mutex_unlock(&iommu->lock);
> + return 0;
> + }
> +
> + /* Check if there is already region mapping the msi page */
> + list_for_each_entry(region, &iommu->reserved_iova_list, next) {
> + if (region->map_paddr == msi_addr) {
> + *msi_iova = region->iova;
> + region->refcount++;
> + mutex_unlock(&iommu->lock);
> + return 0;
> + }
> + }
> +
> + /* Get a unmapped reserved region */
> + list_for_each_entry(region, &iommu->reserved_iova_list, next) {
> + if (!region->map_paddr)
> + break;
> + }
> +
> + if (region == NULL) {
> + mutex_unlock(&iommu->lock);
> + return -ENODEV;
> + }
> +
> + ret = vfio_iommu_map(iommu, region->iova, msi_addr >> PAGE_SHIFT,
> + size >> PAGE_SHIFT, region->prot);
So the reserved region has a size and the msi mapping has a size and we
arbitrarily decide to use the msi mapping size here? The overlap checks
we've done for the reserved region are meaningless then. No wonder
you're unmapping with PAGE_SIZE, we have no idea.
> + if (ret) {
> + mutex_unlock(&iommu->lock);
> + return ret;
> + }
> +
> + region->map_paddr = msi_addr;
Is there some sort of implied page alignment with msi_addr? I could
pass 0x0 for one call, 0x1 for another and due to the mapping shift, get
two reserved IOVAs pointing at the same msi page.
> + *msi_iova = region->iova;
> + region->refcount++;
> +
> + mutex_unlock(&iommu->lock);
> +
> + return 0;
> +}
> +
> +static int vfio_iommu_type1_msi_unmap(void *iommu_data, uint64_t iova,
> + uint64_t size)
> +{
> + struct vfio_iommu *iommu = iommu_data;
> + struct vfio_resvd_region *region;
> + struct vfio_domain *d;
> +
> + mutex_lock(&iommu->lock);
> +
> + /* find the region mapping the msi page */
> + list_for_each_entry(region, &iommu->reserved_iova_list, next)
> + if (region->iova == iova)
> + break;
> +
> + if (region == NULL || region->refcount <= 0) {
> + mutex_unlock(&iommu->lock);
> + return -EINVAL;
> + }
> +
> + region->refcount--;
> + if (!region->refcount) {
> + list_for_each_entry(d, &iommu->domain_list, next) {
> + if (!iommu_iova_to_phys(d->domain, iova))
> + continue;
> +
> + iommu_unmap(d->domain, iova, size);
And here we're just trusting that the unmap was the same size as the
map?
> + cond_resched();
> + }
> + }
> + region->map_paddr = 0;
> +
> + mutex_unlock(&iommu->lock);
> + return 0;
> +}
> +
> static void *vfio_iommu_type1_open(unsigned long arg)
> {
> struct vfio_iommu *iommu;
> @@ -1264,6 +1373,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
> .ioctl = vfio_iommu_type1_ioctl,
> .attach_group = vfio_iommu_type1_attach_group,
> .detach_group = vfio_iommu_type1_detach_group,
> + .msi_map = vfio_iommu_type1_msi_map,
> + .msi_unmap = vfio_iommu_type1_msi_unmap,
> };
>
> static int __init vfio_iommu_type1_init(void)
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index ddb4409..b91085d 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -52,6 +52,10 @@ extern void *vfio_del_group_dev(struct device *dev);
> extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
> extern void vfio_device_put(struct vfio_device *device);
> extern void *vfio_device_data(struct vfio_device *device);
> +extern int vfio_device_map_msi(struct vfio_device *device, uint64_t msi_addr,
> + uint32_t size, uint64_t *msi_iova);
> +int vfio_device_unmap_msi(struct vfio_device *device, uint64_t msi_iova,
> + uint64_t size);
>
> /**
> * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
> @@ -72,7 +76,10 @@ struct vfio_iommu_driver_ops {
> struct iommu_group *group);
> void (*detach_group)(void *iommu_data,
> struct iommu_group *group);
> -
> + int (*msi_map)(void *iommu_data, uint64_t msi_addr,
> + uint64_t size, uint64_t *msi_iova);
> + int (*msi_unmap)(void *iommu_data, uint64_t msi_iova,
> + uint64_t size);
> };
>
> extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
How did this patch solve any of the problems outlined in the commit log?
next prev parent reply other threads:[~2015-10-02 22:44 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-30 14:56 [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region Bharat Bhushan
2015-09-30 14:56 ` [RFC PATCH 2/6] iommu: Add interface to get msi-pages mapping attributes Bharat Bhushan
2015-09-30 14:56 ` Bharat Bhushan
2015-10-02 22:45 ` Alex Williamson
2015-10-05 5:17 ` Bhushan Bharat
2015-10-05 5:56 ` Bhushan Bharat
2015-09-30 14:56 ` [RFC PATCH 3/6] vfio: Extend iommu-info to return MSIs automap state Bharat Bhushan
2015-09-30 14:56 ` Bharat Bhushan
2015-10-02 22:46 ` Alex Williamson
2015-10-05 6:00 ` Bhushan Bharat
2015-10-05 22:45 ` Alex Williamson
2015-10-06 8:53 ` Bhushan Bharat
2015-10-06 15:11 ` Alex Williamson
2015-09-30 14:56 ` [RFC PATCH 4/6] vfio: Add interface to iommu-map/unmap MSI pages Bharat Bhushan
2015-09-30 14:56 ` Bharat Bhushan
2015-10-02 22:46 ` Alex Williamson [this message]
2015-10-05 6:27 ` Bhushan Bharat
2015-10-05 22:45 ` Alex Williamson
2015-10-06 9:05 ` Bhushan Bharat
2015-10-06 15:12 ` Alex Williamson
2015-09-30 14:56 ` [RFC PATCH 5/6] vfio-pci: Create iommu mapping for msi interrupt Bharat Bhushan
2015-09-30 14:56 ` Bharat Bhushan
2015-09-30 11:02 ` kbuild test robot
2015-09-30 11:02 ` kbuild test robot
2015-09-30 11:32 ` Bhushan Bharat
2015-09-30 11:34 ` kbuild test robot
2015-09-30 11:34 ` kbuild test robot
2015-10-02 22:46 ` Alex Williamson
2015-10-05 7:20 ` Bhushan Bharat
2015-10-05 22:44 ` Alex Williamson
2015-10-06 8:32 ` Bhushan Bharat
2015-10-06 15:06 ` Alex Williamson
2015-09-30 14:56 ` [RFC PATCH 6/6] arm-smmu: Allow to set iommu mapping for MSI Bharat Bhushan
2015-09-30 14:56 ` Bharat Bhushan
2015-10-02 22:46 ` Alex Williamson
2015-10-05 8:33 ` Bhushan Bharat
2015-10-05 22:54 ` Alex Williamson
2015-10-06 10:26 ` Bhushan Bharat
2015-10-26 15:40 ` Christoffer Dall
2015-11-02 2:53 ` Pranavkumar Sawargaonkar
2015-10-02 22:45 ` [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region Alex Williamson
2015-10-05 4:55 ` Bhushan Bharat
2015-10-05 22:45 ` Alex Williamson
2015-10-06 9:39 ` Bhushan Bharat
2015-10-06 15:21 ` Alex Williamson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1443825980.26107.189.camel@redhat.com \
--to=alex.williamson@redhat.com \
--cc=Bharat.Bhushan@freescale.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.cs.columbia.edu \
--cc=marc.zyngier@arm.com \
--cc=will.deacon@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.